/* armv8-curve25519
 *
 * Copyright (C) 2006-2025 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

#include <wolfssl/wolfcrypt/libwolfssl_sources_asm.h>

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./x25519/x25519.rb arm64 \
 *       ../wolfssl/wolfcrypt/src/port/arm/armv8-curve25519.S
 */
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#ifndef WOLFSSL_ARMASM_INLINE
#if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
#if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL)
#ifndef __APPLE__
.text
.globl	fe_init
.type	fe_init,@function
.align	2
fe_init:
#else
.section	__TEXT,__text
.globl	_fe_init
.p2align	2
_fe_init:
#endif /* __APPLE__ */
	ret
#ifndef __APPLE__
	.size	fe_init,.-fe_init
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl	fe_frombytes
.type	fe_frombytes,@function
.align	2
fe_frombytes:
#else
.section	__TEXT,__text
.globl	_fe_frombytes
.p2align	2
_fe_frombytes:
#endif /* __APPLE__ */
	ldp	x2, x3, [x1]
	ldp	x4, x5, [x1, #16]
	and	x5, x5, #0x7fffffffffffffff
	stp	x2, x3, [x0]
	stp	x4, x5, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_frombytes,.-fe_frombytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_tobytes
.type	fe_tobytes,@function
.align	2
fe_tobytes:
#else
.section	__TEXT,__text
.globl	_fe_tobytes
.p2align	2
_fe_tobytes:
#endif /* __APPLE__ */
	mov	x7, #19
	ldp	x2, x3, [x1]
	ldp	x4, x5, [x1, #16]
	adds	x6, x2, x7
	adcs	x6, x3, xzr
	adcs	x6, x4, xzr
	adc	x6, x5, xzr
	and	x6, x7, x6, asr 63
	adds	x2, x2, x6
	adcs	x3, x3, xzr
	adcs	x4, x4, xzr
	adc	x5, x5, xzr
	and	x5, x5, #0x7fffffffffffffff
	stp	x2, x3, [x0]
	stp	x4, x5, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_tobytes,.-fe_tobytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_1
.type	fe_1,@function
.align	2
fe_1:
#else
.section	__TEXT,__text
.globl	_fe_1
.p2align	2
_fe_1:
#endif /* __APPLE__ */
	# Set one
	mov	x1, #1
	stp	x1, xzr, [x0]
	stp	xzr, xzr, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_1,.-fe_1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_0
.type	fe_0,@function
.align	2
fe_0:
#else
.section	__TEXT,__text
.globl	_fe_0
.p2align	2
_fe_0:
#endif /* __APPLE__ */
	# Set zero
	stp	xzr, xzr, [x0]
	stp	xzr, xzr, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_0,.-fe_0
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_copy
.type	fe_copy,@function
.align	2
fe_copy:
#else
.section	__TEXT,__text
.globl	_fe_copy
.p2align	2
_fe_copy:
#endif /* __APPLE__ */
	# Copy
	ldp	x2, x3, [x1]
	ldp	x4, x5, [x1, #16]
	stp	x2, x3, [x0]
	stp	x4, x5, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_copy,.-fe_copy
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_sub
.type	fe_sub,@function
.align	2
fe_sub:
#else
.section	__TEXT,__text
.globl	_fe_sub
.p2align	2
_fe_sub:
#endif /* __APPLE__ */
	# Sub
	ldp	x3, x4, [x1]
	ldp	x5, x6, [x1, #16]
	ldp	x7, x8, [x2]
	ldp	x9, x10, [x2, #16]
	subs	x3, x3, x7
	sbcs	x4, x4, x8
	sbcs	x5, x5, x9
	sbcs	x6, x6, x10
	csetm	x11, cc
	mov	x12, #-19
	#   Mask the modulus
	extr	x11, x11, x6, #63
	mul	x12, x11, x12
	#   Add modulus (if underflow)
	subs	x3, x3, x12
	sbcs	x4, x4, xzr
	and	x6, x6, #0x7fffffffffffffff
	sbcs	x5, x5, xzr
	sbc	x6, x6, xzr
	stp	x3, x4, [x0]
	stp	x5, x6, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_sub,.-fe_sub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_add
.type	fe_add,@function
.align	2
fe_add:
#else
.section	__TEXT,__text
.globl	_fe_add
.p2align	2
_fe_add:
#endif /* __APPLE__ */
	# Add
	ldp	x3, x4, [x1]
	ldp	x5, x6, [x1, #16]
	ldp	x7, x8, [x2]
	ldp	x9, x10, [x2, #16]
	adds	x3, x3, x7
	adcs	x4, x4, x8
	adcs	x5, x5, x9
	adcs	x6, x6, x10
	cset	x11, cs
	mov	x12, #19
	#   Mask the modulus
	extr	x11, x11, x6, #63
	mul	x12, x11, x12
	#   Sub modulus (if overflow)
	adds	x3, x3, x12
	adcs	x4, x4, xzr
	and	x6, x6, #0x7fffffffffffffff
	adcs	x5, x5, xzr
	adc	x6, x6, xzr
	stp	x3, x4, [x0]
	stp	x5, x6, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_add,.-fe_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_neg
.type	fe_neg,@function
.align	2
fe_neg:
#else
.section	__TEXT,__text
.globl	_fe_neg
.p2align	2
_fe_neg:
#endif /* __APPLE__ */
	ldp	x2, x3, [x1]
	ldp	x4, x5, [x1, #16]
	mov	x6, #-19
	mov	x7, #-1
	mov	x8, #-1
	mov	x9, #0x7fffffffffffffff
	subs	x6, x6, x2
	sbcs	x7, x7, x3
	sbcs	x8, x8, x4
	sbc	x9, x9, x5
	stp	x6, x7, [x0]
	stp	x8, x9, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_neg,.-fe_neg
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_isnonzero
.type	fe_isnonzero,@function
.align	2
fe_isnonzero:
#else
.section	__TEXT,__text
.globl	_fe_isnonzero
.p2align	2
_fe_isnonzero:
#endif /* __APPLE__ */
	mov	x6, #19
	ldp	x1, x2, [x0]
	ldp	x3, x4, [x0, #16]
	adds	x5, x1, x6
	adcs	x5, x2, xzr
	adcs	x5, x3, xzr
	adc	x5, x4, xzr
	and	x5, x6, x5, asr 63
	adds	x1, x1, x5
	adcs	x2, x2, xzr
	adcs	x3, x3, xzr
	adc	x4, x4, xzr
	and	x4, x4, #0x7fffffffffffffff
	orr	x0, x1, x2
	orr	x3, x3, x4
	orr	x0, x0, x3
	ret
#ifndef __APPLE__
	.size	fe_isnonzero,.-fe_isnonzero
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_isnegative
.type	fe_isnegative,@function
.align	2
fe_isnegative:
#else
.section	__TEXT,__text
.globl	_fe_isnegative
.p2align	2
_fe_isnegative:
#endif /* __APPLE__ */
	mov	x6, #19
	ldp	x1, x2, [x0]
	ldp	x3, x4, [x0, #16]
	adds	x5, x1, x6
	adcs	x5, x2, xzr
	adcs	x5, x3, xzr
	adc	x5, x4, xzr
	and	x0, x1, #1
	eor	x0, x0, x5, lsr 63
	ret
#ifndef __APPLE__
	.size	fe_isnegative,.-fe_isnegative
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_cmov_table
.type	fe_cmov_table,@function
.align	2
fe_cmov_table:
#else
.section	__TEXT,__text
.globl	_fe_cmov_table
.p2align	2
_fe_cmov_table:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-128]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #40]
	stp	x20, x21, [x29, #56]
	stp	x22, x23, [x29, #72]
	stp	x24, x25, [x29, #88]
	stp	x26, x27, [x29, #104]
	str	x28, [x29, #120]
	str	x0, [x29, #16]
	sxtb	x2, w2
	sbfx	x3, x2, #7, #1
	eor	x0, x2, x3
	sub	x0, x0, x3
	mov	x4, #1
	mov	x5, xzr
	mov	x6, xzr
	mov	x7, xzr
	mov	x8, #1
	mov	x9, xzr
	mov	x10, xzr
	mov	x11, xzr
	mov	x12, xzr
	mov	x13, xzr
	mov	x14, xzr
	mov	x15, xzr
	cmp	x0, #1
	ldp	x16, x17, [x1]
	ldp	x19, x20, [x1, #16]
	ldp	x21, x22, [x1, #32]
	ldp	x23, x24, [x1, #48]
	ldp	x25, x26, [x1, #64]
	ldp	x27, x28, [x1, #80]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #2
	ldp	x16, x17, [x1, #96]
	ldp	x19, x20, [x1, #112]
	ldp	x21, x22, [x1, #128]
	ldp	x23, x24, [x1, #144]
	ldp	x25, x26, [x1, #160]
	ldp	x27, x28, [x1, #176]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #3
	ldp	x16, x17, [x1, #192]
	ldp	x19, x20, [x1, #208]
	ldp	x21, x22, [x1, #224]
	ldp	x23, x24, [x1, #240]
	ldp	x25, x26, [x1, #256]
	ldp	x27, x28, [x1, #272]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #4
	ldp	x16, x17, [x1, #288]
	ldp	x19, x20, [x1, #304]
	ldp	x21, x22, [x1, #320]
	ldp	x23, x24, [x1, #336]
	ldp	x25, x26, [x1, #352]
	ldp	x27, x28, [x1, #368]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	add	x1, x1, #0x180
	cmp	x0, #5
	ldp	x16, x17, [x1]
	ldp	x19, x20, [x1, #16]
	ldp	x21, x22, [x1, #32]
	ldp	x23, x24, [x1, #48]
	ldp	x25, x26, [x1, #64]
	ldp	x27, x28, [x1, #80]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #6
	ldp	x16, x17, [x1, #96]
	ldp	x19, x20, [x1, #112]
	ldp	x21, x22, [x1, #128]
	ldp	x23, x24, [x1, #144]
	ldp	x25, x26, [x1, #160]
	ldp	x27, x28, [x1, #176]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #7
	ldp	x16, x17, [x1, #192]
	ldp	x19, x20, [x1, #208]
	ldp	x21, x22, [x1, #224]
	ldp	x23, x24, [x1, #240]
	ldp	x25, x26, [x1, #256]
	ldp	x27, x28, [x1, #272]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	cmp	x0, #8
	ldp	x16, x17, [x1, #288]
	ldp	x19, x20, [x1, #304]
	ldp	x21, x22, [x1, #320]
	ldp	x23, x24, [x1, #336]
	ldp	x25, x26, [x1, #352]
	ldp	x27, x28, [x1, #368]
	csel	x4, x16, x4, eq
	csel	x5, x17, x5, eq
	csel	x6, x19, x6, eq
	csel	x7, x20, x7, eq
	csel	x8, x21, x8, eq
	csel	x9, x22, x9, eq
	csel	x10, x23, x10, eq
	csel	x11, x24, x11, eq
	csel	x12, x25, x12, eq
	csel	x13, x26, x13, eq
	csel	x14, x27, x14, eq
	csel	x15, x28, x15, eq
	mov	x16, #-19
	mov	x17, #-1
	mov	x19, #-1
	mov	x20, #0x7fffffffffffffff
	subs	x16, x16, x12
	sbcs	x17, x17, x13
	sbcs	x19, x19, x14
	sbc	x20, x20, x15
	cmp	x2, #0
	mov	x3, x4
	csel	x4, x8, x4, lt
	csel	x8, x3, x8, lt
	mov	x3, x5
	csel	x5, x9, x5, lt
	csel	x9, x3, x9, lt
	mov	x3, x6
	csel	x6, x10, x6, lt
	csel	x10, x3, x10, lt
	mov	x3, x7
	csel	x7, x11, x7, lt
	csel	x11, x3, x11, lt
	csel	x12, x16, x12, lt
	csel	x13, x17, x13, lt
	csel	x14, x19, x14, lt
	csel	x15, x20, x15, lt
	ldr	x0, [x29, #16]
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	stp	x8, x9, [x0, #32]
	stp	x10, x11, [x0, #48]
	stp	x12, x13, [x0, #64]
	stp	x14, x15, [x0, #80]
	ldp	x17, x19, [x29, #40]
	ldp	x20, x21, [x29, #56]
	ldp	x22, x23, [x29, #72]
	ldp	x24, x25, [x29, #88]
	ldp	x26, x27, [x29, #104]
	ldr	x28, [x29, #120]
	ldp	x29, x30, [sp], #0x80
	ret
#ifndef __APPLE__
	.size	fe_cmov_table,.-fe_cmov_table
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifndef __APPLE__
.text
.globl	fe_mul
.type	fe_mul,@function
.align	2
fe_mul:
#else
.section	__TEXT,__text
.globl	_fe_mul
.p2align	2
_fe_mul:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-64]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #24]
	stp	x20, x21, [x29, #40]
	str	x22, [x29, #56]
	# Multiply
	ldp	x14, x15, [x1]
	ldp	x16, x17, [x1, #16]
	ldp	x19, x20, [x2]
	ldp	x21, x22, [x2, #16]
	# A[0] * B[0]
	umulh	x7, x14, x19
	mul	x6, x14, x19
	# A[2] * B[0]
	umulh	x9, x16, x19
	mul	x8, x16, x19
	# A[1] * B[0]
	mul	x3, x15, x19
	adds	x7, x7, x3
	umulh	x4, x15, x19
	adcs	x8, x8, x4
	# A[1] * B[3]
	umulh	x11, x15, x22
	adc	x9, x9, xzr
	mul	x10, x15, x22
	# A[0] * B[1]
	mul	x3, x14, x20
	adds	x7, x7, x3
	umulh	x4, x14, x20
	adcs	x8, x8, x4
	# A[2] * B[1]
	mul	x3, x16, x20
	adcs	x9, x9, x3
	umulh	x4, x16, x20
	adcs	x10, x10, x4
	adc	x11, x11, xzr
	# A[1] * B[2]
	mul	x3, x15, x21
	adds	x9, x9, x3
	umulh	x4, x15, x21
	adcs	x10, x10, x4
	adcs	x11, x11, xzr
	adc	x12, xzr, xzr
	# A[0] * B[2]
	mul	x3, x14, x21
	adds	x8, x8, x3
	umulh	x4, x14, x21
	adcs	x9, x9, x4
	adcs	x10, x10, xzr
	adcs	x11, x11, xzr
	adc	x12, x12, xzr
	# A[1] * B[1]
	mul	x3, x15, x20
	adds	x8, x8, x3
	umulh	x4, x15, x20
	adcs	x9, x9, x4
	# A[3] * B[1]
	mul	x3, x17, x20
	adcs	x10, x10, x3
	umulh	x4, x17, x20
	adcs	x11, x11, x4
	adc	x12, x12, xzr
	# A[2] * B[2]
	mul	x3, x16, x21
	adds	x10, x10, x3
	umulh	x4, x16, x21
	adcs	x11, x11, x4
	# A[3] * B[3]
	mul	x3, x17, x22
	adcs	x12, x12, x3
	umulh	x13, x17, x22
	adc	x13, x13, xzr
	# A[0] * B[3]
	mul	x3, x14, x22
	adds	x9, x9, x3
	umulh	x4, x14, x22
	adcs	x10, x10, x4
	# A[2] * B[3]
	mul	x3, x16, x22
	adcs	x11, x11, x3
	umulh	x4, x16, x22
	adcs	x12, x12, x4
	adc	x13, x13, xzr
	# A[3] * B[0]
	mul	x3, x17, x19
	adds	x9, x9, x3
	umulh	x4, x17, x19
	adcs	x10, x10, x4
	# A[3] * B[2]
	mul	x3, x17, x21
	adcs	x11, x11, x3
	umulh	x4, x17, x21
	adcs	x12, x12, x4
	adc	x13, x13, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x13
	adds	x9, x9, x4
	umulh	x5, x3, x13
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x9, #63
	mul	x5, x5, x3
	and	x9, x9, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x10
	adds	x6, x6, x4
	umulh	x10, x3, x10
	mul	x4, x3, x11
	adcs	x7, x7, x4
	umulh	x11, x3, x11
	mul	x4, x3, x12
	adcs	x8, x8, x4
	umulh	x12, x3, x12
	adc	x9, x9, xzr
	#  Add high product results in
	adds	x6, x6, x5
	adcs	x7, x7, x10
	adcs	x8, x8, x11
	adc	x9, x9, x12
	# Reduce if top bit set
	mov	x3, #19
	and	x4, x3, x9, asr 63
	adds	x6, x6, x4
	adcs	x7, x7, xzr
	and	x9, x9, #0x7fffffffffffffff
	adcs	x8, x8, xzr
	adc	x9, x9, xzr
	# Store
	stp	x6, x7, [x0]
	stp	x8, x9, [x0, #16]
	ldp	x17, x19, [x29, #24]
	ldp	x20, x21, [x29, #40]
	ldr	x22, [x29, #56]
	ldp	x29, x30, [sp], #0x40
	ret
#ifndef __APPLE__
	.size	fe_mul,.-fe_mul
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_sq
.type	fe_sq,@function
.align	2
fe_sq:
#else
.section	__TEXT,__text
.globl	_fe_sq
.p2align	2
_fe_sq:
#endif /* __APPLE__ */
	# Square
	ldp	x13, x14, [x1]
	ldp	x15, x16, [x1, #16]
	#  A[0] * A[1]
	umulh	x7, x13, x14
	mul	x6, x13, x14
	#  A[0] * A[3]
	umulh	x9, x13, x16
	mul	x8, x13, x16
	#  A[0] * A[2]
	mul	x2, x13, x15
	adds	x7, x7, x2
	umulh	x3, x13, x15
	adcs	x8, x8, x3
	#  A[1] * A[3]
	mul	x2, x14, x16
	adcs	x9, x9, x2
	umulh	x10, x14, x16
	adc	x10, x10, xzr
	#  A[1] * A[2]
	mul	x2, x14, x15
	adds	x8, x8, x2
	umulh	x3, x14, x15
	adcs	x9, x9, x3
	#  A[2] * A[3]
	mul	x2, x15, x16
	adcs	x10, x10, x2
	umulh	x11, x15, x16
	adc	x11, x11, xzr
	# Double
	adds	x6, x6, x6
	adcs	x7, x7, x7
	adcs	x8, x8, x8
	adcs	x9, x9, x9
	adcs	x10, x10, x10
	adcs	x11, x11, x11
	adc	x12, xzr, xzr
	#  A[0] * A[0]
	umulh	x3, x13, x13
	mul	x5, x13, x13
	#  A[1] * A[1]
	mul	x2, x14, x14
	adds	x6, x6, x3
	umulh	x3, x14, x14
	adcs	x7, x7, x2
	#  A[2] * A[2]
	mul	x2, x15, x15
	adcs	x8, x8, x3
	umulh	x3, x15, x15
	adcs	x9, x9, x2
	#  A[3] * A[3]
	mul	x2, x16, x16
	adcs	x10, x10, x3
	umulh	x3, x16, x16
	adcs	x11, x11, x2
	adc	x12, x12, x3
	# Reduce
	mov	x2, #38
	mul	x3, x2, x12
	adds	x8, x8, x3
	umulh	x4, x2, x12
	adc	x4, x4, xzr
	mov	x2, #19
	extr	x4, x4, x8, #63
	mul	x4, x4, x2
	and	x8, x8, #0x7fffffffffffffff
	mov	x2, #38
	mul	x3, x2, x9
	adds	x5, x5, x3
	umulh	x9, x2, x9
	mul	x3, x2, x10
	adcs	x6, x6, x3
	umulh	x10, x2, x10
	mul	x3, x2, x11
	adcs	x7, x7, x3
	umulh	x11, x2, x11
	adc	x8, x8, xzr
	#  Add high product results in
	adds	x5, x5, x4
	adcs	x6, x6, x9
	adcs	x7, x7, x10
	adc	x8, x8, x11
	# Reduce if top bit set
	mov	x2, #19
	and	x3, x2, x8, asr 63
	adds	x5, x5, x3
	adcs	x6, x6, xzr
	and	x8, x8, #0x7fffffffffffffff
	adcs	x7, x7, xzr
	adc	x8, x8, xzr
	# Store
	stp	x5, x6, [x0]
	stp	x7, x8, [x0, #16]
	ret
#ifndef __APPLE__
	.size	fe_sq,.-fe_sq
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	fe_invert
.type	fe_invert,@function
.align	2
fe_invert:
#else
.section	__TEXT,__text
.globl	_fe_invert
.p2align	2
_fe_invert:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-176]!
	add	x29, sp, #0
	stp	x17, x20, [x29, #160]
	# Invert
	str	x0, [x29, #144]
	str	x1, [x29, #152]
	add	x0, x29, #16
#ifndef NDEBUG
	ldr	x1, [x29, #152]
#endif /* !NDEBUG */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	add	x0, x29, #48
	add	x1, x29, #16
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	add	x1, x29, #48
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	ldr	x1, [x29, #152]
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	add	x0, x29, #16
	add	x1, x29, #16
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	add	x0, x29, #0x50
#ifndef NDEBUG
	add	x1, x29, #16
#endif /* !NDEBUG */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	add	x0, x29, #48
	add	x1, x29, #48
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 5 times
	mov	x20, #5
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_invert1:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert1
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x20, #10
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_invert2:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert2
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
	add	x0, x29, #0x50
#ifndef NDEBUG
	add	x1, x29, #0x50
#endif /* !NDEBUG */
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 20 times
	mov	x20, #20
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_fe_invert3:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert3
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
#ifndef NDEBUG
	add	x0, x29, #0x50
#endif /* !NDEBUG */
	add	x1, x29, #0x70
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x20, #10
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_fe_invert4:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert4
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
	add	x0, x29, #48
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x20, #50
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_invert5:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert5
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
	add	x0, x29, #0x50
#ifndef NDEBUG
	add	x1, x29, #0x50
#endif /* !NDEBUG */
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 100 times
	mov	x20, #0x64
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_fe_invert6:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert6
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
#ifndef NDEBUG
	add	x0, x29, #0x50
#endif /* !NDEBUG */
	add	x1, x29, #0x70
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x20, #50
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_fe_invert7:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert7
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
	add	x0, x29, #48
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 5 times
	mov	x20, #5
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_invert8:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x20, x20, #1
	bne	L_fe_invert8
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
	ldr	x0, [x29, #144]
	add	x1, x29, #48
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	ldp	x17, x20, [x29, #160]
	ldp	x29, x30, [sp], #0xb0
	ret
#ifndef __APPLE__
	.size	fe_invert,.-fe_invert
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	curve25519
.type	curve25519,@function
.align	2
curve25519:
#else
.section	__TEXT,__text
.globl	_curve25519
.p2align	2
_curve25519:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-288]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #200]
	stp	x20, x21, [x29, #216]
	stp	x22, x23, [x29, #232]
	stp	x24, x25, [x29, #248]
	stp	x26, x27, [x29, #264]
	str	x28, [x29, #280]
	mov	x23, xzr
	str	x0, [x29, #176]
	str	x2, [x29, #184]
	ldp	x6, x7, [x2]
	ldp	x8, x9, [x2, #16]
	mov	x10, #1
	mov	x11, xzr
	mov	x12, xzr
	mov	x13, xzr
	stp	x10, x11, [x0]
	stp	x12, x13, [x0, #16]
	# Set zero
	stp	xzr, xzr, [x29, #16]
	stp	xzr, xzr, [x29, #32]
	mov	x24, #0xfe
L_curve25519_bits:
	lsr	x3, x24, #6
	and	x4, x24, #63
	ldr	x5, [x1, x3, LSL 3]
	lsr	x5, x5, x4
	eor	x23, x23, x5
	# Conditional Swap
	subs	xzr, xzr, x23, lsl 63
	ldp	x25, x26, [x29, #16]
	ldp	x27, x28, [x29, #32]
	csel	x19, x25, x10, ne
	csel	x25, x10, x25, ne
	csel	x20, x26, x11, ne
	csel	x26, x11, x26, ne
	csel	x21, x27, x12, ne
	csel	x27, x12, x27, ne
	csel	x22, x28, x13, ne
	csel	x28, x13, x28, ne
	# Conditional Swap
	subs	xzr, xzr, x23, lsl 63
	ldp	x10, x11, [x0]
	ldp	x12, x13, [x0, #16]
	csel	x14, x10, x6, ne
	csel	x10, x6, x10, ne
	csel	x15, x11, x7, ne
	csel	x11, x7, x11, ne
	csel	x16, x12, x8, ne
	csel	x12, x8, x12, ne
	csel	x17, x13, x9, ne
	csel	x13, x9, x13, ne
	mov	x23, x5
	# Add
	adds	x6, x10, x25
	adcs	x7, x11, x26
	adcs	x8, x12, x27
	adcs	x9, x13, x28
	cset	x5, cs
	mov	x3, #19
	extr	x5, x5, x9, #63
	mul	x3, x5, x3
	#   Sub modulus (if overflow)
	adds	x6, x6, x3
	adcs	x7, x7, xzr
	and	x9, x9, #0x7fffffffffffffff
	adcs	x8, x8, xzr
	adc	x9, x9, xzr
	# Sub
	subs	x25, x10, x25
	sbcs	x26, x11, x26
	sbcs	x27, x12, x27
	sbcs	x28, x13, x28
	csetm	x5, cc
	mov	x3, #-19
	extr	x5, x5, x28, #63
	mul	x3, x5, x3
	#   Add modulus (if underflow)
	subs	x25, x25, x3
	sbcs	x26, x26, xzr
	and	x28, x28, #0x7fffffffffffffff
	sbcs	x27, x27, xzr
	sbc	x28, x28, xzr
	stp	x25, x26, [x29, #80]
	stp	x27, x28, [x29, #96]
	# Add
	adds	x10, x14, x19
	adcs	x11, x15, x20
	adcs	x12, x16, x21
	adcs	x13, x17, x22
	cset	x5, cs
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x3, x5, x3
	#   Sub modulus (if overflow)
	adds	x10, x10, x3
	adcs	x11, x11, xzr
	and	x13, x13, #0x7fffffffffffffff
	adcs	x12, x12, xzr
	adc	x13, x13, xzr
	# Sub
	subs	x14, x14, x19
	sbcs	x15, x15, x20
	sbcs	x16, x16, x21
	sbcs	x17, x17, x22
	csetm	x5, cc
	mov	x3, #-19
	extr	x5, x5, x17, #63
	mul	x3, x5, x3
	#   Add modulus (if underflow)
	subs	x14, x14, x3
	sbcs	x15, x15, xzr
	and	x17, x17, #0x7fffffffffffffff
	sbcs	x16, x16, xzr
	sbc	x17, x17, xzr
	# Multiply
	# A[0] * B[0]
	umulh	x20, x14, x6
	mul	x19, x14, x6
	# A[2] * B[0]
	umulh	x22, x16, x6
	mul	x21, x16, x6
	# A[1] * B[0]
	mul	x3, x15, x6
	adds	x20, x20, x3
	umulh	x4, x15, x6
	adcs	x21, x21, x4
	# A[1] * B[3]
	umulh	x26, x15, x9
	adc	x22, x22, xzr
	mul	x25, x15, x9
	# A[0] * B[1]
	mul	x3, x14, x7
	adds	x20, x20, x3
	umulh	x4, x14, x7
	adcs	x21, x21, x4
	# A[2] * B[1]
	mul	x3, x16, x7
	adcs	x22, x22, x3
	umulh	x4, x16, x7
	adcs	x25, x25, x4
	adc	x26, x26, xzr
	# A[1] * B[2]
	mul	x3, x15, x8
	adds	x22, x22, x3
	umulh	x4, x15, x8
	adcs	x25, x25, x4
	adcs	x26, x26, xzr
	adc	x27, xzr, xzr
	# A[0] * B[2]
	mul	x3, x14, x8
	adds	x21, x21, x3
	umulh	x4, x14, x8
	adcs	x22, x22, x4
	adcs	x25, x25, xzr
	adcs	x26, x26, xzr
	adc	x27, x27, xzr
	# A[1] * B[1]
	mul	x3, x15, x7
	adds	x21, x21, x3
	umulh	x4, x15, x7
	adcs	x22, x22, x4
	# A[3] * B[1]
	mul	x3, x17, x7
	adcs	x25, x25, x3
	umulh	x4, x17, x7
	adcs	x26, x26, x4
	adc	x27, x27, xzr
	# A[2] * B[2]
	mul	x3, x16, x8
	adds	x25, x25, x3
	umulh	x4, x16, x8
	adcs	x26, x26, x4
	# A[3] * B[3]
	mul	x3, x17, x9
	adcs	x27, x27, x3
	umulh	x28, x17, x9
	adc	x28, x28, xzr
	# A[0] * B[3]
	mul	x3, x14, x9
	adds	x22, x22, x3
	umulh	x4, x14, x9
	adcs	x25, x25, x4
	# A[2] * B[3]
	mul	x3, x16, x9
	adcs	x26, x26, x3
	umulh	x4, x16, x9
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# A[3] * B[0]
	mul	x3, x17, x6
	adds	x22, x22, x3
	umulh	x4, x17, x6
	adcs	x25, x25, x4
	# A[3] * B[2]
	mul	x3, x17, x8
	adcs	x26, x26, x3
	umulh	x4, x17, x8
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x22, x22, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x22, #63
	mul	x5, x5, x3
	and	x22, x22, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x19, x19, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x20, x20, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x21, x21, x4
	umulh	x27, x3, x27
	adc	x22, x22, xzr
	#  Add high product results in
	adds	x19, x19, x5
	adcs	x20, x20, x25
	adcs	x21, x21, x26
	adc	x22, x22, x27
	# Store
	stp	x19, x20, [x29, #48]
	stp	x21, x22, [x29, #64]
	# Multiply
	ldp	x25, x26, [x29, #80]
	ldp	x27, x28, [x29, #96]
	# A[0] * B[0]
	umulh	x20, x10, x25
	mul	x19, x10, x25
	# A[2] * B[0]
	umulh	x22, x12, x25
	mul	x21, x12, x25
	# A[1] * B[0]
	mul	x3, x11, x25
	adds	x20, x20, x3
	umulh	x4, x11, x25
	adcs	x21, x21, x4
	# A[1] * B[3]
	umulh	x15, x11, x28
	adc	x22, x22, xzr
	mul	x14, x11, x28
	# A[0] * B[1]
	mul	x3, x10, x26
	adds	x20, x20, x3
	umulh	x4, x10, x26
	adcs	x21, x21, x4
	# A[2] * B[1]
	mul	x3, x12, x26
	adcs	x22, x22, x3
	umulh	x4, x12, x26
	adcs	x14, x14, x4
	adc	x15, x15, xzr
	# A[1] * B[2]
	mul	x3, x11, x27
	adds	x22, x22, x3
	umulh	x4, x11, x27
	adcs	x14, x14, x4
	adcs	x15, x15, xzr
	adc	x16, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x27
	adds	x21, x21, x3
	umulh	x4, x10, x27
	adcs	x22, x22, x4
	adcs	x14, x14, xzr
	adcs	x15, x15, xzr
	adc	x16, x16, xzr
	# A[1] * B[1]
	mul	x3, x11, x26
	adds	x21, x21, x3
	umulh	x4, x11, x26
	adcs	x22, x22, x4
	# A[3] * B[1]
	mul	x3, x13, x26
	adcs	x14, x14, x3
	umulh	x4, x13, x26
	adcs	x15, x15, x4
	adc	x16, x16, xzr
	# A[2] * B[2]
	mul	x3, x12, x27
	adds	x14, x14, x3
	umulh	x4, x12, x27
	adcs	x15, x15, x4
	# A[3] * B[3]
	mul	x3, x13, x28
	adcs	x16, x16, x3
	umulh	x17, x13, x28
	adc	x17, x17, xzr
	# A[0] * B[3]
	mul	x3, x10, x28
	adds	x22, x22, x3
	umulh	x4, x10, x28
	adcs	x14, x14, x4
	# A[2] * B[3]
	mul	x3, x12, x28
	adcs	x15, x15, x3
	umulh	x4, x12, x28
	adcs	x16, x16, x4
	adc	x17, x17, xzr
	# A[3] * B[0]
	mul	x3, x13, x25
	adds	x22, x22, x3
	umulh	x4, x13, x25
	adcs	x14, x14, x4
	# A[3] * B[2]
	mul	x3, x13, x27
	adcs	x15, x15, x3
	umulh	x4, x13, x27
	adcs	x16, x16, x4
	adc	x17, x17, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x22, x22, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x22, #63
	mul	x5, x5, x3
	and	x22, x22, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x19, x19, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x20, x20, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x21, x21, x4
	umulh	x16, x3, x16
	adc	x22, x22, xzr
	#  Add high product results in
	adds	x19, x19, x5
	adcs	x20, x20, x14
	adcs	x21, x21, x15
	adc	x22, x22, x16
	# Square
	#  A[0] * A[1]
	umulh	x12, x25, x26
	mul	x11, x25, x26
	#  A[0] * A[3]
	umulh	x14, x25, x28
	mul	x13, x25, x28
	#  A[0] * A[2]
	mul	x3, x25, x27
	adds	x12, x12, x3
	umulh	x4, x25, x27
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x26, x28
	adcs	x14, x14, x3
	umulh	x15, x26, x28
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x26, x27
	adds	x13, x13, x3
	umulh	x4, x26, x27
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x27, x28
	adcs	x15, x15, x3
	umulh	x16, x27, x28
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x25, x25
	mul	x10, x25, x25
	#  A[1] * A[1]
	mul	x3, x26, x26
	adds	x11, x11, x4
	umulh	x4, x26, x26
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x27, x27
	adcs	x13, x13, x4
	umulh	x4, x27, x27
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x28, x28
	adcs	x15, x15, x4
	umulh	x4, x28, x28
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x10, x10, x5
	adcs	x11, x11, x14
	adcs	x12, x12, x15
	adc	x13, x13, x16
	# Square
	#  A[0] * A[1]
	umulh	x16, x6, x7
	mul	x15, x6, x7
	#  A[0] * A[3]
	umulh	x25, x6, x9
	mul	x17, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x16, x16, x3
	umulh	x4, x6, x8
	adcs	x17, x17, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x25, x25, x3
	umulh	x26, x7, x9
	adc	x26, x26, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x17, x17, x3
	umulh	x4, x7, x8
	adcs	x25, x25, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x26, x26, x3
	umulh	x27, x8, x9
	adc	x27, x27, xzr
	# Double
	adds	x15, x15, x15
	adcs	x16, x16, x16
	adcs	x17, x17, x17
	adcs	x25, x25, x25
	adcs	x26, x26, x26
	adcs	x27, x27, x27
	adc	x28, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x14, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x15, x15, x4
	umulh	x4, x7, x7
	adcs	x16, x16, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x17, x17, x4
	umulh	x4, x8, x8
	adcs	x25, x25, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x26, x26, x4
	umulh	x4, x9, x9
	adcs	x27, x27, x3
	adc	x28, x28, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x17, x17, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x14, x14, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x15, x15, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x16, x16, x4
	umulh	x27, x3, x27
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x25
	adcs	x16, x16, x26
	adc	x17, x17, x27
	# Multiply
	# A[0] * B[0]
	umulh	x7, x14, x10
	mul	x6, x14, x10
	# A[2] * B[0]
	umulh	x9, x16, x10
	mul	x8, x16, x10
	# A[1] * B[0]
	mul	x3, x15, x10
	adds	x7, x7, x3
	umulh	x4, x15, x10
	adcs	x8, x8, x4
	# A[1] * B[3]
	umulh	x26, x15, x13
	adc	x9, x9, xzr
	mul	x25, x15, x13
	# A[0] * B[1]
	mul	x3, x14, x11
	adds	x7, x7, x3
	umulh	x4, x14, x11
	adcs	x8, x8, x4
	# A[2] * B[1]
	mul	x3, x16, x11
	adcs	x9, x9, x3
	umulh	x4, x16, x11
	adcs	x25, x25, x4
	adc	x26, x26, xzr
	# A[1] * B[2]
	mul	x3, x15, x12
	adds	x9, x9, x3
	umulh	x4, x15, x12
	adcs	x25, x25, x4
	adcs	x26, x26, xzr
	adc	x27, xzr, xzr
	# A[0] * B[2]
	mul	x3, x14, x12
	adds	x8, x8, x3
	umulh	x4, x14, x12
	adcs	x9, x9, x4
	adcs	x25, x25, xzr
	adcs	x26, x26, xzr
	adc	x27, x27, xzr
	# A[1] * B[1]
	mul	x3, x15, x11
	adds	x8, x8, x3
	umulh	x4, x15, x11
	adcs	x9, x9, x4
	# A[3] * B[1]
	mul	x3, x17, x11
	adcs	x25, x25, x3
	umulh	x4, x17, x11
	adcs	x26, x26, x4
	adc	x27, x27, xzr
	# A[2] * B[2]
	mul	x3, x16, x12
	adds	x25, x25, x3
	umulh	x4, x16, x12
	adcs	x26, x26, x4
	# A[3] * B[3]
	mul	x3, x17, x13
	adcs	x27, x27, x3
	umulh	x28, x17, x13
	adc	x28, x28, xzr
	# A[0] * B[3]
	mul	x3, x14, x13
	adds	x9, x9, x3
	umulh	x4, x14, x13
	adcs	x25, x25, x4
	# A[2] * B[3]
	mul	x3, x16, x13
	adcs	x26, x26, x3
	umulh	x4, x16, x13
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# A[3] * B[0]
	mul	x3, x17, x10
	adds	x9, x9, x3
	umulh	x4, x17, x10
	adcs	x25, x25, x4
	# A[3] * B[2]
	mul	x3, x17, x12
	adcs	x26, x26, x3
	umulh	x4, x17, x12
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x9, x9, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x9, #63
	mul	x5, x5, x3
	and	x9, x9, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x6, x6, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x7, x7, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x8, x8, x4
	umulh	x27, x3, x27
	adc	x9, x9, xzr
	#  Add high product results in
	adds	x6, x6, x5
	adcs	x7, x7, x25
	adcs	x8, x8, x26
	adc	x9, x9, x27
	# Store
	stp	x6, x7, [x0]
	stp	x8, x9, [x0, #16]
	# Sub
	subs	x14, x14, x10
	sbcs	x15, x15, x11
	sbcs	x16, x16, x12
	sbcs	x17, x17, x13
	csetm	x5, cc
	mov	x3, #-19
	#   Mask the modulus
	extr	x5, x5, x17, #63
	mul	x3, x5, x3
	#   Add modulus (if underflow)
	subs	x14, x14, x3
	sbcs	x15, x15, xzr
	and	x17, x17, #0x7fffffffffffffff
	sbcs	x16, x16, xzr
	sbc	x17, x17, xzr
	# Multiply by 121666
	mov	x5, #0xdb42
	movk	x5, #1, lsl 16
	mul	x6, x14, x5
	umulh	x7, x14, x5
	mul	x3, x15, x5
	umulh	x8, x15, x5
	adds	x7, x7, x3
	adc	x8, x8, xzr
	mul	x3, x16, x5
	umulh	x9, x16, x5
	adds	x8, x8, x3
	adc	x9, x9, xzr
	mul	x3, x17, x5
	umulh	x4, x17, x5
	adds	x9, x9, x3
	adc	x4, x4, xzr
	mov	x5, #19
	extr	x4, x4, x9, #63
	mul	x4, x4, x5
	adds	x6, x6, x4
	adcs	x7, x7, xzr
	and	x9, x9, #0x7fffffffffffffff
	adcs	x8, x8, xzr
	adc	x9, x9, xzr
	# Add
	adds	x10, x10, x6
	adcs	x11, x11, x7
	adcs	x12, x12, x8
	adcs	x13, x13, x9
	cset	x5, cs
	mov	x3, #19
	#   Mask the modulus
	extr	x5, x5, x13, #63
	mul	x3, x5, x3
	#   Sub modulus (if overflow)
	adds	x10, x10, x3
	adcs	x11, x11, xzr
	and	x13, x13, #0x7fffffffffffffff
	adcs	x12, x12, xzr
	adc	x13, x13, xzr
	# Multiply
	# A[0] * B[0]
	umulh	x7, x14, x10
	mul	x6, x14, x10
	# A[2] * B[0]
	umulh	x9, x16, x10
	mul	x8, x16, x10
	# A[1] * B[0]
	mul	x3, x15, x10
	adds	x7, x7, x3
	umulh	x4, x15, x10
	adcs	x8, x8, x4
	# A[1] * B[3]
	umulh	x26, x15, x13
	adc	x9, x9, xzr
	mul	x25, x15, x13
	# A[0] * B[1]
	mul	x3, x14, x11
	adds	x7, x7, x3
	umulh	x4, x14, x11
	adcs	x8, x8, x4
	# A[2] * B[1]
	mul	x3, x16, x11
	adcs	x9, x9, x3
	umulh	x4, x16, x11
	adcs	x25, x25, x4
	adc	x26, x26, xzr
	# A[1] * B[2]
	mul	x3, x15, x12
	adds	x9, x9, x3
	umulh	x4, x15, x12
	adcs	x25, x25, x4
	adcs	x26, x26, xzr
	adc	x27, xzr, xzr
	# A[0] * B[2]
	mul	x3, x14, x12
	adds	x8, x8, x3
	umulh	x4, x14, x12
	adcs	x9, x9, x4
	adcs	x25, x25, xzr
	adcs	x26, x26, xzr
	adc	x27, x27, xzr
	# A[1] * B[1]
	mul	x3, x15, x11
	adds	x8, x8, x3
	umulh	x4, x15, x11
	adcs	x9, x9, x4
	# A[3] * B[1]
	mul	x3, x17, x11
	adcs	x25, x25, x3
	umulh	x4, x17, x11
	adcs	x26, x26, x4
	adc	x27, x27, xzr
	# A[2] * B[2]
	mul	x3, x16, x12
	adds	x25, x25, x3
	umulh	x4, x16, x12
	adcs	x26, x26, x4
	# A[3] * B[3]
	mul	x3, x17, x13
	adcs	x27, x27, x3
	umulh	x28, x17, x13
	adc	x28, x28, xzr
	# A[0] * B[3]
	mul	x3, x14, x13
	adds	x9, x9, x3
	umulh	x4, x14, x13
	adcs	x25, x25, x4
	# A[2] * B[3]
	mul	x3, x16, x13
	adcs	x26, x26, x3
	umulh	x4, x16, x13
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# A[3] * B[0]
	mul	x3, x17, x10
	adds	x9, x9, x3
	umulh	x4, x17, x10
	adcs	x25, x25, x4
	# A[3] * B[2]
	mul	x3, x17, x12
	adcs	x26, x26, x3
	umulh	x4, x17, x12
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x9, x9, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x9, #63
	mul	x5, x5, x3
	and	x9, x9, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x6, x6, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x7, x7, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x8, x8, x4
	umulh	x27, x3, x27
	adc	x9, x9, xzr
	#  Add high product results in
	adds	x6, x6, x5
	adcs	x7, x7, x25
	adcs	x8, x8, x26
	adc	x9, x9, x27
	# Store
	stp	x6, x7, [x29, #16]
	stp	x8, x9, [x29, #32]
	# Add
	ldp	x25, x26, [x29, #48]
	ldp	x27, x28, [x29, #64]
	adds	x10, x25, x19
	adcs	x11, x26, x20
	adcs	x12, x27, x21
	adcs	x13, x28, x22
	cset	x5, cs
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x3, x5, x3
	#   Sub modulus (if overflow)
	adds	x10, x10, x3
	adcs	x11, x11, xzr
	and	x13, x13, #0x7fffffffffffffff
	adcs	x12, x12, xzr
	adc	x13, x13, xzr
	# Sub
	subs	x19, x25, x19
	sbcs	x20, x26, x20
	sbcs	x21, x27, x21
	sbcs	x22, x28, x22
	csetm	x5, cc
	mov	x3, #-19
	extr	x5, x5, x22, #63
	mul	x3, x5, x3
	#   Add modulus (if underflow)
	subs	x19, x19, x3
	sbcs	x20, x20, xzr
	and	x22, x22, #0x7fffffffffffffff
	sbcs	x21, x21, xzr
	sbc	x22, x22, xzr
	# Square
	#  A[0] * A[1]
	umulh	x8, x10, x11
	mul	x7, x10, x11
	#  A[0] * A[3]
	umulh	x25, x10, x13
	mul	x9, x10, x13
	#  A[0] * A[2]
	mul	x3, x10, x12
	adds	x8, x8, x3
	umulh	x4, x10, x12
	adcs	x9, x9, x4
	#  A[1] * A[3]
	mul	x3, x11, x13
	adcs	x25, x25, x3
	umulh	x26, x11, x13
	adc	x26, x26, xzr
	#  A[1] * A[2]
	mul	x3, x11, x12
	adds	x9, x9, x3
	umulh	x4, x11, x12
	adcs	x25, x25, x4
	#  A[2] * A[3]
	mul	x3, x12, x13
	adcs	x26, x26, x3
	umulh	x27, x12, x13
	adc	x27, x27, xzr
	# Double
	adds	x7, x7, x7
	adcs	x8, x8, x8
	adcs	x9, x9, x9
	adcs	x25, x25, x25
	adcs	x26, x26, x26
	adcs	x27, x27, x27
	adc	x28, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x10, x10
	mul	x6, x10, x10
	#  A[1] * A[1]
	mul	x3, x11, x11
	adds	x7, x7, x4
	umulh	x4, x11, x11
	adcs	x8, x8, x3
	#  A[2] * A[2]
	mul	x3, x12, x12
	adcs	x9, x9, x4
	umulh	x4, x12, x12
	adcs	x25, x25, x3
	#  A[3] * A[3]
	mul	x3, x13, x13
	adcs	x26, x26, x4
	umulh	x4, x13, x13
	adcs	x27, x27, x3
	adc	x28, x28, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x9, x9, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x9, #63
	mul	x5, x5, x3
	and	x9, x9, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x6, x6, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x7, x7, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x8, x8, x4
	umulh	x27, x3, x27
	adc	x9, x9, xzr
	#  Add high product results in
	adds	x6, x6, x5
	adcs	x7, x7, x25
	adcs	x8, x8, x26
	adc	x9, x9, x27
	# Square
	#  A[0] * A[1]
	umulh	x16, x19, x20
	mul	x15, x19, x20
	#  A[0] * A[3]
	umulh	x25, x19, x22
	mul	x17, x19, x22
	#  A[0] * A[2]
	mul	x3, x19, x21
	adds	x16, x16, x3
	umulh	x4, x19, x21
	adcs	x17, x17, x4
	#  A[1] * A[3]
	mul	x3, x20, x22
	adcs	x25, x25, x3
	umulh	x26, x20, x22
	adc	x26, x26, xzr
	#  A[1] * A[2]
	mul	x3, x20, x21
	adds	x17, x17, x3
	umulh	x4, x20, x21
	adcs	x25, x25, x4
	#  A[2] * A[3]
	mul	x3, x21, x22
	adcs	x26, x26, x3
	umulh	x27, x21, x22
	adc	x27, x27, xzr
	# Double
	adds	x15, x15, x15
	adcs	x16, x16, x16
	adcs	x17, x17, x17
	adcs	x25, x25, x25
	adcs	x26, x26, x26
	adcs	x27, x27, x27
	adc	x28, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x19, x19
	mul	x14, x19, x19
	#  A[1] * A[1]
	mul	x3, x20, x20
	adds	x15, x15, x4
	umulh	x4, x20, x20
	adcs	x16, x16, x3
	#  A[2] * A[2]
	mul	x3, x21, x21
	adcs	x17, x17, x4
	umulh	x4, x21, x21
	adcs	x25, x25, x3
	#  A[3] * A[3]
	mul	x3, x22, x22
	adcs	x26, x26, x4
	umulh	x4, x22, x22
	adcs	x27, x27, x3
	adc	x28, x28, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x17, x17, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x14, x14, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x15, x15, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x16, x16, x4
	umulh	x27, x3, x27
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x25
	adcs	x16, x16, x26
	adc	x17, x17, x27
	# Multiply
	ldp	x19, x20, [x2]
	ldp	x21, x22, [x2, #16]
	# A[0] * B[0]
	umulh	x11, x19, x14
	mul	x10, x19, x14
	# A[2] * B[0]
	umulh	x13, x21, x14
	mul	x12, x21, x14
	# A[1] * B[0]
	mul	x3, x20, x14
	adds	x11, x11, x3
	umulh	x4, x20, x14
	adcs	x12, x12, x4
	# A[1] * B[3]
	umulh	x26, x20, x17
	adc	x13, x13, xzr
	mul	x25, x20, x17
	# A[0] * B[1]
	mul	x3, x19, x15
	adds	x11, x11, x3
	umulh	x4, x19, x15
	adcs	x12, x12, x4
	# A[2] * B[1]
	mul	x3, x21, x15
	adcs	x13, x13, x3
	umulh	x4, x21, x15
	adcs	x25, x25, x4
	adc	x26, x26, xzr
	# A[1] * B[2]
	mul	x3, x20, x16
	adds	x13, x13, x3
	umulh	x4, x20, x16
	adcs	x25, x25, x4
	adcs	x26, x26, xzr
	adc	x27, xzr, xzr
	# A[0] * B[2]
	mul	x3, x19, x16
	adds	x12, x12, x3
	umulh	x4, x19, x16
	adcs	x13, x13, x4
	adcs	x25, x25, xzr
	adcs	x26, x26, xzr
	adc	x27, x27, xzr
	# A[1] * B[1]
	mul	x3, x20, x15
	adds	x12, x12, x3
	umulh	x4, x20, x15
	adcs	x13, x13, x4
	# A[3] * B[1]
	mul	x3, x22, x15
	adcs	x25, x25, x3
	umulh	x4, x22, x15
	adcs	x26, x26, x4
	adc	x27, x27, xzr
	# A[2] * B[2]
	mul	x3, x21, x16
	adds	x25, x25, x3
	umulh	x4, x21, x16
	adcs	x26, x26, x4
	# A[3] * B[3]
	mul	x3, x22, x17
	adcs	x27, x27, x3
	umulh	x28, x22, x17
	adc	x28, x28, xzr
	# A[0] * B[3]
	mul	x3, x19, x17
	adds	x13, x13, x3
	umulh	x4, x19, x17
	adcs	x25, x25, x4
	# A[2] * B[3]
	mul	x3, x21, x17
	adcs	x26, x26, x3
	umulh	x4, x21, x17
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# A[3] * B[0]
	mul	x3, x22, x14
	adds	x13, x13, x3
	umulh	x4, x22, x14
	adcs	x25, x25, x4
	# A[3] * B[2]
	mul	x3, x22, x16
	adcs	x26, x26, x3
	umulh	x4, x22, x16
	adcs	x27, x27, x4
	adc	x28, x28, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x28
	adds	x13, x13, x4
	umulh	x5, x3, x28
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x25
	adds	x10, x10, x4
	umulh	x25, x3, x25
	mul	x4, x3, x26
	adcs	x11, x11, x4
	umulh	x26, x3, x26
	mul	x4, x3, x27
	adcs	x12, x12, x4
	umulh	x27, x3, x27
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x10, x10, x5
	adcs	x11, x11, x25
	adcs	x12, x12, x26
	adc	x13, x13, x27
	subs	x24, x24, #1
	bge	L_curve25519_bits
	# Invert
	add	x0, x29, #48
	add	x1, x29, #16
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	add	x0, x29, #0x50
	add	x1, x29, #48
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #0x50
#endif /* !NDEBUG */
	add	x1, x29, #0x50
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #0x50
#endif /* !NDEBUG */
	add	x1, x29, #16
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	add	x0, x29, #48
	add	x1, x29, #48
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	add	x0, x29, #0x70
#ifndef NDEBUG
	add	x1, x29, #48
#endif /* !NDEBUG */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	add	x0, x29, #0x50
	add	x1, x29, #0x50
	add	x2, x29, #0x70
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 5 times
	mov	x24, #5
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_curve25519_inv_1:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_1
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
#ifndef NDEBUG
	add	x0, x29, #0x50
#endif /* !NDEBUG */
	add	x1, x29, #0x70
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x24, #10
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_curve25519_inv_2:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_2
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
	add	x0, x29, #0x70
#ifndef NDEBUG
	add	x1, x29, #0x70
#endif /* !NDEBUG */
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 20 times
	mov	x24, #20
	ldp	x6, x7, [x29, #112]
	ldp	x8, x9, [x29, #128]
L_curve25519_inv_3:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_3
	# Store
	stp	x6, x7, [x29, #144]
	stp	x8, x9, [x29, #160]
#ifndef NDEBUG
	add	x0, x29, #0x70
#endif /* !NDEBUG */
	add	x1, x29, #0x90
	add	x2, x29, #0x70
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x24, #10
	ldp	x6, x7, [x29, #112]
	ldp	x8, x9, [x29, #128]
L_curve25519_inv_4:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_4
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
	add	x0, x29, #0x50
	add	x1, x29, #0x70
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x24, #50
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_curve25519_inv_5:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_5
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
	add	x0, x29, #0x70
#ifndef NDEBUG
	add	x1, x29, #0x70
#endif /* !NDEBUG */
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 100 times
	mov	x24, #0x64
	ldp	x6, x7, [x29, #112]
	ldp	x8, x9, [x29, #128]
L_curve25519_inv_6:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_6
	# Store
	stp	x6, x7, [x29, #144]
	stp	x8, x9, [x29, #160]
#ifndef NDEBUG
	add	x0, x29, #0x70
#endif /* !NDEBUG */
	add	x1, x29, #0x90
	add	x2, x29, #0x70
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x24, #50
	ldp	x6, x7, [x29, #112]
	ldp	x8, x9, [x29, #128]
L_curve25519_inv_7:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_7
	# Store
	stp	x6, x7, [x29, #112]
	stp	x8, x9, [x29, #128]
	add	x0, x29, #0x50
	add	x1, x29, #0x70
	add	x2, x29, #0x50
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 5 times
	mov	x24, #5
	ldp	x6, x7, [x29, #80]
	ldp	x8, x9, [x29, #96]
L_curve25519_inv_8:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x24, x24, #1
	bne	L_curve25519_inv_8
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
	add	x0, x29, #16
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	ldr	x0, [x29, #176]
	# Multiply
	ldp	x6, x7, [x0]
	ldp	x8, x9, [x0, #16]
	ldp	x10, x11, [x29, #16]
	ldp	x12, x13, [x29, #32]
	# A[0] * B[0]
	umulh	x15, x6, x10
	mul	x14, x6, x10
	# A[2] * B[0]
	umulh	x17, x8, x10
	mul	x16, x8, x10
	# A[1] * B[0]
	mul	x3, x7, x10
	adds	x15, x15, x3
	umulh	x4, x7, x10
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x7, x13
	adc	x17, x17, xzr
	mul	x19, x7, x13
	# A[0] * B[1]
	mul	x3, x6, x11
	adds	x15, x15, x3
	umulh	x4, x6, x11
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x8, x11
	adcs	x17, x17, x3
	umulh	x4, x8, x11
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x7, x12
	adds	x17, x17, x3
	umulh	x4, x7, x12
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x6, x12
	adds	x16, x16, x3
	umulh	x4, x6, x12
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x7, x11
	adds	x16, x16, x3
	umulh	x4, x7, x11
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x9, x11
	adcs	x19, x19, x3
	umulh	x4, x9, x11
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x8, x12
	adds	x19, x19, x3
	umulh	x4, x8, x12
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x9, x13
	adcs	x21, x21, x3
	umulh	x22, x9, x13
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x6, x13
	adds	x17, x17, x3
	umulh	x4, x6, x13
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x8, x13
	adcs	x20, x20, x3
	umulh	x4, x8, x13
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x9, x10
	adds	x17, x17, x3
	umulh	x4, x9, x10
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x9, x12
	adcs	x20, x20, x3
	umulh	x4, x9, x12
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Reduce if top bit set
	mov	x3, #19
	and	x4, x3, x17, asr 63
	adds	x14, x14, x4
	adcs	x15, x15, xzr
	and	x17, x17, #0x7fffffffffffffff
	adcs	x16, x16, xzr
	adc	x17, x17, xzr
	adds	x4, x14, x3
	adcs	x4, x15, xzr
	adcs	x4, x16, xzr
	adc	x4, x17, xzr
	and	x4, x3, x4, asr 63
	adds	x14, x14, x4
	adcs	x15, x15, xzr
	mov	x4, #0x7fffffffffffffff
	adcs	x16, x16, xzr
	adc	x17, x17, xzr
	and	x17, x17, x4
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	mov	x0, xzr
	ldp	x17, x19, [x29, #200]
	ldp	x20, x21, [x29, #216]
	ldp	x22, x23, [x29, #232]
	ldp	x24, x25, [x29, #248]
	ldp	x26, x27, [x29, #264]
	ldr	x28, [x29, #280]
	ldp	x29, x30, [sp], #0x120
	ret
#ifndef __APPLE__
	.size	curve25519,.-curve25519
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl	fe_pow22523
.type	fe_pow22523,@function
.align	2
fe_pow22523:
#else
.section	__TEXT,__text
.globl	_fe_pow22523
.p2align	2
_fe_pow22523:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-144]!
	add	x29, sp, #0
	stp	x17, x23, [x29, #128]
	# pow22523
	str	x0, [x29, #112]
	str	x1, [x29, #120]
	add	x0, x29, #16
#ifndef NDEBUG
	ldr	x1, [x29, #120]
#endif /* !NDEBUG */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	add	x0, x29, #48
	add	x1, x29, #16
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	add	x1, x29, #48
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	ldr	x1, [x29, #120]
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	add	x0, x29, #16
	add	x1, x29, #16
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #16
#endif /* !NDEBUG */
#ifndef NDEBUG
	add	x1, x29, #16
#endif /* !NDEBUG */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #16
#endif /* !NDEBUG */
	add	x1, x29, #48
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 5 times
	mov	x23, #5
	ldp	x6, x7, [x29, #16]
	ldp	x8, x9, [x29, #32]
L_fe_pow22523_1:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_1
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
#ifndef NDEBUG
	add	x0, x29, #16
#endif /* !NDEBUG */
#ifndef NDEBUG
	add	x1, x29, #48
#endif /* !NDEBUG */
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x23, #10
	ldp	x6, x7, [x29, #16]
	ldp	x8, x9, [x29, #32]
L_fe_pow22523_2:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_2
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
	add	x0, x29, #48
#ifndef NDEBUG
	add	x1, x29, #48
#endif /* !NDEBUG */
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 20 times
	mov	x23, #20
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_pow22523_3:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_3
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 10 times
	mov	x23, #10
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_pow22523_4:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_4
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
	add	x0, x29, #16
	add	x1, x29, #48
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x23, #50
	ldp	x6, x7, [x29, #16]
	ldp	x8, x9, [x29, #32]
L_fe_pow22523_5:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_5
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
	add	x0, x29, #48
#ifndef NDEBUG
	add	x1, x29, #48
#endif /* !NDEBUG */
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 100 times
	mov	x23, #0x64
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_pow22523_6:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_6
	# Store
	stp	x6, x7, [x29, #80]
	stp	x8, x9, [x29, #96]
#ifndef NDEBUG
	add	x0, x29, #48
#endif /* !NDEBUG */
	add	x1, x29, #0x50
	add	x2, x29, #48
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	# Loop: 50 times
	mov	x23, #50
	ldp	x6, x7, [x29, #48]
	ldp	x8, x9, [x29, #64]
L_fe_pow22523_7:
	# Square
	#  A[0] * A[1]
	umulh	x12, x6, x7
	mul	x11, x6, x7
	#  A[0] * A[3]
	umulh	x14, x6, x9
	mul	x13, x6, x9
	#  A[0] * A[2]
	mul	x3, x6, x8
	adds	x12, x12, x3
	umulh	x4, x6, x8
	adcs	x13, x13, x4
	#  A[1] * A[3]
	mul	x3, x7, x9
	adcs	x14, x14, x3
	umulh	x15, x7, x9
	adc	x15, x15, xzr
	#  A[1] * A[2]
	mul	x3, x7, x8
	adds	x13, x13, x3
	umulh	x4, x7, x8
	adcs	x14, x14, x4
	#  A[2] * A[3]
	mul	x3, x8, x9
	adcs	x15, x15, x3
	umulh	x16, x8, x9
	adc	x16, x16, xzr
	# Double
	adds	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adcs	x15, x15, x15
	adcs	x16, x16, x16
	adc	x17, xzr, xzr
	#  A[0] * A[0]
	umulh	x4, x6, x6
	mul	x10, x6, x6
	#  A[1] * A[1]
	mul	x3, x7, x7
	adds	x11, x11, x4
	umulh	x4, x7, x7
	adcs	x12, x12, x3
	#  A[2] * A[2]
	mul	x3, x8, x8
	adcs	x13, x13, x4
	umulh	x4, x8, x8
	adcs	x14, x14, x3
	#  A[3] * A[3]
	mul	x3, x9, x9
	adcs	x15, x15, x4
	umulh	x4, x9, x9
	adcs	x16, x16, x3
	adc	x17, x17, x4
	# Reduce
	mov	x3, #38
	mul	x4, x3, x17
	adds	x13, x13, x4
	umulh	x5, x3, x17
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x13, #63
	mul	x5, x5, x3
	and	x13, x13, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x14
	adds	x10, x10, x4
	umulh	x14, x3, x14
	mul	x4, x3, x15
	adcs	x11, x11, x4
	umulh	x15, x3, x15
	mul	x4, x3, x16
	adcs	x12, x12, x4
	umulh	x16, x3, x16
	adc	x13, x13, xzr
	#  Add high product results in
	adds	x6, x10, x5
	adcs	x7, x11, x14
	adcs	x8, x12, x15
	adc	x9, x13, x16
	subs	x23, x23, #1
	bne	L_fe_pow22523_7
	# Store
	stp	x6, x7, [x29, #48]
	stp	x8, x9, [x29, #64]
	add	x0, x29, #16
	add	x1, x29, #48
	add	x2, x29, #16
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
#ifndef NDEBUG
	add	x0, x29, #16
#endif /* !NDEBUG */
	add	x1, x29, #16
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
#ifndef __APPLE__
	bl	fe_sq
#else
	bl	_fe_sq
#endif /* __APPLE__ */
	ldr	x0, [x29, #112]
#ifndef NDEBUG
	add	x1, x29, #16
#endif /* !NDEBUG */
	ldr	x2, [x29, #120]
#ifndef __APPLE__
	bl	fe_mul
#else
	bl	_fe_mul
#endif /* __APPLE__ */
	ldp	x17, x23, [x29, #128]
	ldp	x29, x30, [sp], #0x90
	ret
#ifndef __APPLE__
	.size	fe_pow22523,.-fe_pow22523
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_p1p1_to_p2
.type	ge_p1p1_to_p2,@function
.align	2
ge_p1p1_to_p2:
#else
.section	__TEXT,__text
.globl	_ge_p1p1_to_p2
.p2align	2
_ge_p1p1_to_p2:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-80]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #40]
	stp	x20, x21, [x29, #56]
	str	x22, [x29, #72]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	mov	x2, x1
	add	x1, x1, #0x60
	# Multiply
	ldp	x10, x11, [x1]
	ldp	x12, x13, [x1, #16]
	ldp	x6, x7, [x2]
	ldp	x8, x9, [x2, #16]
	# A[0] * B[0]
	umulh	x15, x10, x6
	mul	x14, x10, x6
	# A[2] * B[0]
	umulh	x17, x12, x6
	mul	x16, x12, x6
	# A[1] * B[0]
	mul	x3, x11, x6
	adds	x15, x15, x3
	umulh	x4, x11, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x11, x9
	adc	x17, x17, xzr
	mul	x19, x11, x9
	# A[0] * B[1]
	mul	x3, x10, x7
	adds	x15, x15, x3
	umulh	x4, x10, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x12, x7
	adcs	x17, x17, x3
	umulh	x4, x12, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x11, x8
	adds	x17, x17, x3
	umulh	x4, x11, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x8
	adds	x16, x16, x3
	umulh	x4, x10, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x11, x7
	adds	x16, x16, x3
	umulh	x4, x11, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x13, x7
	adcs	x19, x19, x3
	umulh	x4, x13, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x12, x8
	adds	x19, x19, x3
	umulh	x4, x12, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x13, x9
	adcs	x21, x21, x3
	umulh	x22, x13, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x10, x9
	adds	x17, x17, x3
	umulh	x4, x10, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x12, x9
	adcs	x20, x20, x3
	umulh	x4, x12, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x13, x6
	adds	x17, x17, x3
	umulh	x4, x13, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x13, x8
	adcs	x20, x20, x3
	umulh	x4, x13, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	sub	x2, x1, #32
	add	x0, x0, #0x40
	# Multiply
	ldp	x6, x7, [x2]
	ldp	x8, x9, [x2, #16]
	# A[0] * B[0]
	umulh	x15, x10, x6
	mul	x14, x10, x6
	# A[2] * B[0]
	umulh	x17, x12, x6
	mul	x16, x12, x6
	# A[1] * B[0]
	mul	x3, x11, x6
	adds	x15, x15, x3
	umulh	x4, x11, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x11, x9
	adc	x17, x17, xzr
	mul	x19, x11, x9
	# A[0] * B[1]
	mul	x3, x10, x7
	adds	x15, x15, x3
	umulh	x4, x10, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x12, x7
	adcs	x17, x17, x3
	umulh	x4, x12, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x11, x8
	adds	x17, x17, x3
	umulh	x4, x11, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x8
	adds	x16, x16, x3
	umulh	x4, x10, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x11, x7
	adds	x16, x16, x3
	umulh	x4, x11, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x13, x7
	adcs	x19, x19, x3
	umulh	x4, x13, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x12, x8
	adds	x19, x19, x3
	umulh	x4, x12, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x13, x9
	adcs	x21, x21, x3
	umulh	x22, x13, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x10, x9
	adds	x17, x17, x3
	umulh	x4, x10, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x12, x9
	adcs	x20, x20, x3
	umulh	x4, x12, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x13, x6
	adds	x17, x17, x3
	umulh	x4, x13, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x13, x8
	adcs	x20, x20, x3
	umulh	x4, x13, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	sub	x1, x1, #0x40
	sub	x0, x0, #32
	# Multiply
	ldp	x10, x11, [x1]
	ldp	x12, x13, [x1, #16]
	# A[0] * B[0]
	umulh	x15, x10, x6
	mul	x14, x10, x6
	# A[2] * B[0]
	umulh	x17, x12, x6
	mul	x16, x12, x6
	# A[1] * B[0]
	mul	x3, x11, x6
	adds	x15, x15, x3
	umulh	x4, x11, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x11, x9
	adc	x17, x17, xzr
	mul	x19, x11, x9
	# A[0] * B[1]
	mul	x3, x10, x7
	adds	x15, x15, x3
	umulh	x4, x10, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x12, x7
	adcs	x17, x17, x3
	umulh	x4, x12, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x11, x8
	adds	x17, x17, x3
	umulh	x4, x11, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x8
	adds	x16, x16, x3
	umulh	x4, x10, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x11, x7
	adds	x16, x16, x3
	umulh	x4, x11, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x13, x7
	adcs	x19, x19, x3
	umulh	x4, x13, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x12, x8
	adds	x19, x19, x3
	umulh	x4, x12, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x13, x9
	adcs	x21, x21, x3
	umulh	x22, x13, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x10, x9
	adds	x17, x17, x3
	umulh	x4, x10, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x12, x9
	adcs	x20, x20, x3
	umulh	x4, x12, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x13, x6
	adds	x17, x17, x3
	umulh	x4, x13, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x13, x8
	adcs	x20, x20, x3
	umulh	x4, x13, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	ldp	x17, x19, [x29, #40]
	ldp	x20, x21, [x29, #56]
	ldr	x22, [x29, #72]
	ldp	x29, x30, [sp], #0x50
	ret
#ifndef __APPLE__
	.size	ge_p1p1_to_p2,.-ge_p1p1_to_p2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_p1p1_to_p3
.type	ge_p1p1_to_p3,@function
.align	2
ge_p1p1_to_p3:
#else
.section	__TEXT,__text
.globl	_ge_p1p1_to_p3
.p2align	2
_ge_p1p1_to_p3:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-112]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #40]
	stp	x20, x21, [x29, #56]
	stp	x22, x23, [x29, #72]
	stp	x24, x25, [x29, #88]
	str	x26, [x29, #104]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	mov	x2, x1
	add	x1, x1, #0x60
	# Multiply
	ldp	x10, x11, [x1]
	ldp	x12, x13, [x1, #16]
	ldp	x6, x7, [x2]
	ldp	x8, x9, [x2, #16]
	# A[0] * B[0]
	umulh	x15, x10, x6
	mul	x14, x10, x6
	# A[2] * B[0]
	umulh	x17, x12, x6
	mul	x16, x12, x6
	# A[1] * B[0]
	mul	x3, x11, x6
	adds	x15, x15, x3
	umulh	x4, x11, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x11, x9
	adc	x17, x17, xzr
	mul	x19, x11, x9
	# A[0] * B[1]
	mul	x3, x10, x7
	adds	x15, x15, x3
	umulh	x4, x10, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x12, x7
	adcs	x17, x17, x3
	umulh	x4, x12, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x11, x8
	adds	x17, x17, x3
	umulh	x4, x11, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x8
	adds	x16, x16, x3
	umulh	x4, x10, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x11, x7
	adds	x16, x16, x3
	umulh	x4, x11, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x13, x7
	adcs	x19, x19, x3
	umulh	x4, x13, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x12, x8
	adds	x19, x19, x3
	umulh	x4, x12, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x13, x9
	adcs	x21, x21, x3
	umulh	x22, x13, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x10, x9
	adds	x17, x17, x3
	umulh	x4, x10, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x12, x9
	adcs	x20, x20, x3
	umulh	x4, x12, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x13, x6
	adds	x17, x17, x3
	umulh	x4, x13, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x13, x8
	adcs	x20, x20, x3
	umulh	x4, x13, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	sub	x1, x1, #0x40
	add	x0, x0, #0x60
	# Multiply
	ldp	x23, x24, [x1]
	ldp	x25, x26, [x1, #16]
	# A[0] * B[0]
	umulh	x15, x23, x6
	mul	x14, x23, x6
	# A[2] * B[0]
	umulh	x17, x25, x6
	mul	x16, x25, x6
	# A[1] * B[0]
	mul	x3, x24, x6
	adds	x15, x15, x3
	umulh	x4, x24, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x24, x9
	adc	x17, x17, xzr
	mul	x19, x24, x9
	# A[0] * B[1]
	mul	x3, x23, x7
	adds	x15, x15, x3
	umulh	x4, x23, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x25, x7
	adcs	x17, x17, x3
	umulh	x4, x25, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x24, x8
	adds	x17, x17, x3
	umulh	x4, x24, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x23, x8
	adds	x16, x16, x3
	umulh	x4, x23, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x24, x7
	adds	x16, x16, x3
	umulh	x4, x24, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x26, x7
	adcs	x19, x19, x3
	umulh	x4, x26, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x25, x8
	adds	x19, x19, x3
	umulh	x4, x25, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x26, x9
	adcs	x21, x21, x3
	umulh	x22, x26, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x23, x9
	adds	x17, x17, x3
	umulh	x4, x23, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x25, x9
	adcs	x20, x20, x3
	umulh	x4, x25, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x26, x6
	adds	x17, x17, x3
	umulh	x4, x26, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x26, x8
	adcs	x20, x20, x3
	umulh	x4, x26, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	add	x2, x1, #32
	sub	x0, x0, #0x40
	# Multiply
	ldp	x6, x7, [x2]
	ldp	x8, x9, [x2, #16]
	# A[0] * B[0]
	umulh	x15, x23, x6
	mul	x14, x23, x6
	# A[2] * B[0]
	umulh	x17, x25, x6
	mul	x16, x25, x6
	# A[1] * B[0]
	mul	x3, x24, x6
	adds	x15, x15, x3
	umulh	x4, x24, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x24, x9
	adc	x17, x17, xzr
	mul	x19, x24, x9
	# A[0] * B[1]
	mul	x3, x23, x7
	adds	x15, x15, x3
	umulh	x4, x23, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x25, x7
	adcs	x17, x17, x3
	umulh	x4, x25, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x24, x8
	adds	x17, x17, x3
	umulh	x4, x24, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x23, x8
	adds	x16, x16, x3
	umulh	x4, x23, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x24, x7
	adds	x16, x16, x3
	umulh	x4, x24, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x26, x7
	adcs	x19, x19, x3
	umulh	x4, x26, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x25, x8
	adds	x19, x19, x3
	umulh	x4, x25, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x26, x9
	adcs	x21, x21, x3
	umulh	x22, x26, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x23, x9
	adds	x17, x17, x3
	umulh	x4, x23, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x25, x9
	adcs	x20, x20, x3
	umulh	x4, x25, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x26, x6
	adds	x17, x17, x3
	umulh	x4, x26, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x26, x8
	adcs	x20, x20, x3
	umulh	x4, x26, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	add	x1, x1, #0x40
	add	x0, x0, #32
	# Multiply
	# A[0] * B[0]
	umulh	x15, x10, x6
	mul	x14, x10, x6
	# A[2] * B[0]
	umulh	x17, x12, x6
	mul	x16, x12, x6
	# A[1] * B[0]
	mul	x3, x11, x6
	adds	x15, x15, x3
	umulh	x4, x11, x6
	adcs	x16, x16, x4
	# A[1] * B[3]
	umulh	x20, x11, x9
	adc	x17, x17, xzr
	mul	x19, x11, x9
	# A[0] * B[1]
	mul	x3, x10, x7
	adds	x15, x15, x3
	umulh	x4, x10, x7
	adcs	x16, x16, x4
	# A[2] * B[1]
	mul	x3, x12, x7
	adcs	x17, x17, x3
	umulh	x4, x12, x7
	adcs	x19, x19, x4
	adc	x20, x20, xzr
	# A[1] * B[2]
	mul	x3, x11, x8
	adds	x17, x17, x3
	umulh	x4, x11, x8
	adcs	x19, x19, x4
	adcs	x20, x20, xzr
	adc	x21, xzr, xzr
	# A[0] * B[2]
	mul	x3, x10, x8
	adds	x16, x16, x3
	umulh	x4, x10, x8
	adcs	x17, x17, x4
	adcs	x19, x19, xzr
	adcs	x20, x20, xzr
	adc	x21, x21, xzr
	# A[1] * B[1]
	mul	x3, x11, x7
	adds	x16, x16, x3
	umulh	x4, x11, x7
	adcs	x17, x17, x4
	# A[3] * B[1]
	mul	x3, x13, x7
	adcs	x19, x19, x3
	umulh	x4, x13, x7
	adcs	x20, x20, x4
	adc	x21, x21, xzr
	# A[2] * B[2]
	mul	x3, x12, x8
	adds	x19, x19, x3
	umulh	x4, x12, x8
	adcs	x20, x20, x4
	# A[3] * B[3]
	mul	x3, x13, x9
	adcs	x21, x21, x3
	umulh	x22, x13, x9
	adc	x22, x22, xzr
	# A[0] * B[3]
	mul	x3, x10, x9
	adds	x17, x17, x3
	umulh	x4, x10, x9
	adcs	x19, x19, x4
	# A[2] * B[3]
	mul	x3, x12, x9
	adcs	x20, x20, x3
	umulh	x4, x12, x9
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# A[3] * B[0]
	mul	x3, x13, x6
	adds	x17, x17, x3
	umulh	x4, x13, x6
	adcs	x19, x19, x4
	# A[3] * B[2]
	mul	x3, x13, x8
	adcs	x20, x20, x3
	umulh	x4, x13, x8
	adcs	x21, x21, x4
	adc	x22, x22, xzr
	# Reduce
	mov	x3, #38
	mul	x4, x3, x22
	adds	x17, x17, x4
	umulh	x5, x3, x22
	adc	x5, x5, xzr
	mov	x3, #19
	extr	x5, x5, x17, #63
	mul	x5, x5, x3
	and	x17, x17, #0x7fffffffffffffff
	mov	x3, #38
	mul	x4, x3, x19
	adds	x14, x14, x4
	umulh	x19, x3, x19
	mul	x4, x3, x20
	adcs	x15, x15, x4
	umulh	x20, x3, x20
	mul	x4, x3, x21
	adcs	x16, x16, x4
	umulh	x21, x3, x21
	adc	x17, x17, xzr
	#  Add high product results in
	adds	x14, x14, x5
	adcs	x15, x15, x19
	adcs	x16, x16, x20
	adc	x17, x17, x21
	# Store
	stp	x14, x15, [x0]
	stp	x16, x17, [x0, #16]
	ldp	x17, x19, [x29, #40]
	ldp	x20, x21, [x29, #56]
	ldp	x22, x23, [x29, #72]
	ldp	x24, x25, [x29, #88]
	ldr	x26, [x29, #104]
	ldp	x29, x30, [sp], #0x70
	ret
#ifndef __APPLE__
	.size	ge_p1p1_to_p3,.-ge_p1p1_to_p3
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_p2_dbl
.type	ge_p2_dbl,@function
.align	2
ge_p2_dbl:
#else
.section	__TEXT,__text
.globl	_ge_p2_dbl
.p2align	2
_ge_p2_dbl:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-128]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #40]
	stp	x20, x21, [x29, #56]
	stp	x22, x23, [x29, #72]
	stp	x24, x25, [x29, #88]
	stp	x26, x27, [x29, #104]
	str	x28, [x29, #120]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	add	x0, x0, #0x40
	# Square
	ldp	x4, x5, [x1]
	ldp	x6, x7, [x1, #16]
	#  A[0] * A[1]
	umulh	x10, x4, x5
	mul	x9, x4, x5
	#  A[0] * A[3]
	umulh	x12, x4, x7
	mul	x11, x4, x7
	#  A[0] * A[2]
	mul	x25, x4, x6
	adds	x10, x10, x25
	umulh	x26, x4, x6
	adcs	x11, x11, x26
	#  A[1] * A[3]
	mul	x25, x5, x7
	adcs	x12, x12, x25
	umulh	x13, x5, x7
	adc	x13, x13, xzr
	#  A[1] * A[2]
	mul	x25, x5, x6
	adds	x11, x11, x25
	umulh	x26, x5, x6
	adcs	x12, x12, x26
	#  A[2] * A[3]
	mul	x25, x6, x7
	adcs	x13, x13, x25
	umulh	x14, x6, x7
	adc	x14, x14, xzr
	# Double
	adds	x9, x9, x9
	adcs	x10, x10, x10
	adcs	x11, x11, x11
	adcs	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adc	x15, xzr, xzr
	#  A[0] * A[0]
	umulh	x26, x4, x4
	mul	x8, x4, x4
	#  A[1] * A[1]
	mul	x25, x5, x5
	adds	x9, x9, x26
	umulh	x26, x5, x5
	adcs	x10, x10, x25
	#  A[2] * A[2]
	mul	x25, x6, x6
	adcs	x11, x11, x26
	umulh	x26, x6, x6
	adcs	x12, x12, x25
	#  A[3] * A[3]
	mul	x25, x7, x7
	adcs	x13, x13, x26
	umulh	x26, x7, x7
	adcs	x14, x14, x25
	adc	x15, x15, x26
	# Reduce
	mov	x25, #38
	mul	x26, x25, x15
	adds	x11, x11, x26
	umulh	x27, x25, x15
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x11, #63
	mul	x27, x27, x25
	and	x11, x11, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x12
	adds	x8, x8, x26
	umulh	x12, x25, x12
	mul	x26, x25, x13
	adcs	x9, x9, x26
	umulh	x13, x25, x13
	mul	x26, x25, x14
	adcs	x10, x10, x26
	umulh	x14, x25, x14
	adc	x11, x11, xzr
	#  Add high product results in
	adds	x8, x8, x27
	adcs	x9, x9, x12
	adcs	x10, x10, x13
	adc	x11, x11, x14
	# Store
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	add	x2, x1, #32
	sub	x0, x0, #32
	# Square
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	#  A[0] * A[1]
	umulh	x23, x16, x17
	mul	x22, x16, x17
	#  A[0] * A[3]
	umulh	x4, x16, x20
	mul	x24, x16, x20
	#  A[0] * A[2]
	mul	x25, x16, x19
	adds	x23, x23, x25
	umulh	x26, x16, x19
	adcs	x24, x24, x26
	#  A[1] * A[3]
	mul	x25, x17, x20
	adcs	x4, x4, x25
	umulh	x5, x17, x20
	adc	x5, x5, xzr
	#  A[1] * A[2]
	mul	x25, x17, x19
	adds	x24, x24, x25
	umulh	x26, x17, x19
	adcs	x4, x4, x26
	#  A[2] * A[3]
	mul	x25, x19, x20
	adcs	x5, x5, x25
	umulh	x6, x19, x20
	adc	x6, x6, xzr
	# Double
	adds	x22, x22, x22
	adcs	x23, x23, x23
	adcs	x24, x24, x24
	adcs	x4, x4, x4
	adcs	x5, x5, x5
	adcs	x6, x6, x6
	adc	x7, xzr, xzr
	#  A[0] * A[0]
	umulh	x26, x16, x16
	mul	x21, x16, x16
	#  A[1] * A[1]
	mul	x25, x17, x17
	adds	x22, x22, x26
	umulh	x26, x17, x17
	adcs	x23, x23, x25
	#  A[2] * A[2]
	mul	x25, x19, x19
	adcs	x24, x24, x26
	umulh	x26, x19, x19
	adcs	x4, x4, x25
	#  A[3] * A[3]
	mul	x25, x20, x20
	adcs	x5, x5, x26
	umulh	x26, x20, x20
	adcs	x6, x6, x25
	adc	x7, x7, x26
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	add	x3, x0, #32
	mov	x2, x0
	add	x1, x0, #32
	# Add
	adds	x4, x21, x8
	adcs	x5, x22, x9
	adcs	x6, x23, x10
	adcs	x7, x24, x11
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x7, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x4, x4, x25
	adcs	x5, x5, xzr
	and	x7, x7, #0x7fffffffffffffff
	adcs	x6, x6, xzr
	adc	x7, x7, xzr
	# Sub
	subs	x12, x21, x8
	sbcs	x13, x22, x9
	sbcs	x14, x23, x10
	sbcs	x15, x24, x11
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldr	x1, [x29, #24]
	add	x2, x1, #32
	sub	x0, x0, #32
	# Add
	ldp	x8, x9, [x1]
	ldp	x10, x11, [x1, #16]
	adds	x8, x8, x16
	adcs	x9, x9, x17
	adcs	x10, x10, x19
	adcs	x11, x11, x20
	cset	x28, cs
	mov	x25, #19
	#   Mask the modulus
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	mov	x1, x0
	# Square
	#  A[0] * A[1]
	umulh	x23, x8, x9
	mul	x22, x8, x9
	#  A[0] * A[3]
	umulh	x4, x8, x11
	mul	x24, x8, x11
	#  A[0] * A[2]
	mul	x25, x8, x10
	adds	x23, x23, x25
	umulh	x26, x8, x10
	adcs	x24, x24, x26
	#  A[1] * A[3]
	mul	x25, x9, x11
	adcs	x4, x4, x25
	umulh	x5, x9, x11
	adc	x5, x5, xzr
	#  A[1] * A[2]
	mul	x25, x9, x10
	adds	x24, x24, x25
	umulh	x26, x9, x10
	adcs	x4, x4, x26
	#  A[2] * A[3]
	mul	x25, x10, x11
	adcs	x5, x5, x25
	umulh	x6, x10, x11
	adc	x6, x6, xzr
	# Double
	adds	x22, x22, x22
	adcs	x23, x23, x23
	adcs	x24, x24, x24
	adcs	x4, x4, x4
	adcs	x5, x5, x5
	adcs	x6, x6, x6
	adc	x7, xzr, xzr
	#  A[0] * A[0]
	umulh	x26, x8, x8
	mul	x21, x8, x8
	#  A[1] * A[1]
	mul	x25, x9, x9
	adds	x22, x22, x26
	umulh	x26, x9, x9
	adcs	x23, x23, x25
	#  A[2] * A[2]
	mul	x25, x10, x10
	adcs	x24, x24, x26
	umulh	x26, x10, x10
	adcs	x4, x4, x25
	#  A[3] * A[3]
	mul	x25, x11, x11
	adcs	x5, x5, x26
	umulh	x26, x11, x11
	adcs	x6, x6, x25
	adc	x7, x7, x26
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	add	x2, x0, #32
	# Sub
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	subs	x21, x21, x8
	sbcs	x22, x22, x9
	sbcs	x23, x23, x10
	sbcs	x24, x24, x11
	csetm	x28, cc
	mov	x25, #-19
	#   Mask the modulus
	extr	x28, x28, x24, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x21, x21, x25
	sbcs	x22, x22, xzr
	and	x24, x24, #0x7fffffffffffffff
	sbcs	x23, x23, xzr
	sbc	x24, x24, xzr
	stp	x21, x22, [x0]
	stp	x23, x24, [x0, #16]
	ldr	x2, [x29, #24]
	add	x2, x2, #0x40
	add	x0, x0, #0x60
	# Square * 2
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	#  A[0] * A[1]
	umulh	x6, x16, x17
	mul	x5, x16, x17
	#  A[0] * A[3]
	umulh	x8, x16, x20
	mul	x7, x16, x20
	#  A[0] * A[2]
	mul	x25, x16, x19
	adds	x6, x6, x25
	umulh	x26, x16, x19
	adcs	x7, x7, x26
	#  A[1] * A[3]
	mul	x25, x17, x20
	adcs	x8, x8, x25
	umulh	x9, x17, x20
	adc	x9, x9, xzr
	#  A[1] * A[2]
	mul	x25, x17, x19
	adds	x7, x7, x25
	umulh	x26, x17, x19
	adcs	x8, x8, x26
	#  A[2] * A[3]
	mul	x25, x19, x20
	adcs	x9, x9, x25
	umulh	x10, x19, x20
	adc	x10, x10, xzr
	# Double
	adds	x5, x5, x5
	adcs	x6, x6, x6
	adcs	x7, x7, x7
	adcs	x8, x8, x8
	adcs	x9, x9, x9
	adcs	x10, x10, x10
	adc	x11, xzr, xzr
	#  A[0] * A[0]
	umulh	x26, x16, x16
	mul	x4, x16, x16
	#  A[1] * A[1]
	mul	x25, x17, x17
	adds	x5, x5, x26
	umulh	x26, x17, x17
	adcs	x6, x6, x25
	#  A[2] * A[2]
	mul	x25, x19, x19
	adcs	x7, x7, x26
	umulh	x26, x19, x19
	adcs	x8, x8, x25
	#  A[3] * A[3]
	mul	x25, x20, x20
	adcs	x9, x9, x26
	umulh	x26, x20, x20
	adcs	x10, x10, x25
	adc	x11, x11, x26
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x7, x7, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x7, #63
	mul	x27, x27, x25
	and	x7, x7, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x4, x4, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x5, x5, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x6, x6, x26
	umulh	x10, x25, x10
	adc	x7, x7, xzr
	#  Add high product results in
	adds	x4, x4, x27
	adcs	x5, x5, x8
	adcs	x6, x6, x9
	adc	x7, x7, x10
	mov	x25, #19
	lsr	x26, x7, #62
	extr	x7, x7, x6, #63
	extr	x6, x6, x5, #63
	extr	x5, x5, x4, #63
	lsl	x4, x4, #1
	mul	x26, x26, x25
	adds	x4, x4, x26
	adcs	x5, x5, xzr
	and	x7, x7, #0x7fffffffffffffff
	adcs	x6, x6, xzr
	adc	x7, x7, xzr
	# Store
	sub	x1, x0, #32
	# Sub
	subs	x4, x4, x12
	sbcs	x5, x5, x13
	sbcs	x6, x6, x14
	sbcs	x7, x7, x15
	csetm	x28, cc
	mov	x25, #-19
	#   Mask the modulus
	extr	x28, x28, x7, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x4, x4, x25
	sbcs	x5, x5, xzr
	and	x7, x7, #0x7fffffffffffffff
	sbcs	x6, x6, xzr
	sbc	x7, x7, xzr
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	ldp	x17, x19, [x29, #40]
	ldp	x20, x21, [x29, #56]
	ldp	x22, x23, [x29, #72]
	ldp	x24, x25, [x29, #88]
	ldp	x26, x27, [x29, #104]
	ldr	x28, [x29, #120]
	ldp	x29, x30, [sp], #0x80
	ret
#ifndef __APPLE__
	.size	ge_p2_dbl,.-ge_p2_dbl
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_madd
.type	ge_madd,@function
.align	2
ge_madd:
#else
.section	__TEXT,__text
.globl	_ge_madd
.p2align	2
_ge_madd:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-144]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #56]
	stp	x20, x21, [x29, #72]
	stp	x22, x23, [x29, #88]
	stp	x24, x25, [x29, #104]
	stp	x26, x27, [x29, #120]
	str	x28, [x29, #136]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	str	x2, [x29, #32]
	mov	x3, x1
	add	x2, x1, #32
	add	x1, x0, #32
	# Add
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x16, x8, x4
	adcs	x17, x9, x5
	adcs	x19, x10, x6
	adcs	x20, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x20, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x16, x16, x25
	adcs	x17, x17, xzr
	and	x20, x20, #0x7fffffffffffffff
	adcs	x19, x19, xzr
	adc	x20, x20, xzr
	# Sub
	subs	x12, x8, x4
	sbcs	x13, x9, x5
	sbcs	x14, x10, x6
	sbcs	x15, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	ldr	x2, [x29, #32]
	mov	x1, x0
	# Multiply
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	# A[0] * B[0]
	umulh	x22, x16, x8
	mul	x21, x16, x8
	# A[2] * B[0]
	umulh	x24, x19, x8
	mul	x23, x19, x8
	# A[1] * B[0]
	mul	x25, x17, x8
	adds	x22, x22, x25
	umulh	x26, x17, x8
	adcs	x23, x23, x26
	# A[1] * B[3]
	umulh	x5, x17, x11
	adc	x24, x24, xzr
	mul	x4, x17, x11
	# A[0] * B[1]
	mul	x25, x16, x9
	adds	x22, x22, x25
	umulh	x26, x16, x9
	adcs	x23, x23, x26
	# A[2] * B[1]
	mul	x25, x19, x9
	adcs	x24, x24, x25
	umulh	x26, x19, x9
	adcs	x4, x4, x26
	adc	x5, x5, xzr
	# A[1] * B[2]
	mul	x25, x17, x10
	adds	x24, x24, x25
	umulh	x26, x17, x10
	adcs	x4, x4, x26
	adcs	x5, x5, xzr
	adc	x6, xzr, xzr
	# A[0] * B[2]
	mul	x25, x16, x10
	adds	x23, x23, x25
	umulh	x26, x16, x10
	adcs	x24, x24, x26
	adcs	x4, x4, xzr
	adcs	x5, x5, xzr
	adc	x6, x6, xzr
	# A[1] * B[1]
	mul	x25, x17, x9
	adds	x23, x23, x25
	umulh	x26, x17, x9
	adcs	x24, x24, x26
	# A[3] * B[1]
	mul	x25, x20, x9
	adcs	x4, x4, x25
	umulh	x26, x20, x9
	adcs	x5, x5, x26
	adc	x6, x6, xzr
	# A[2] * B[2]
	mul	x25, x19, x10
	adds	x4, x4, x25
	umulh	x26, x19, x10
	adcs	x5, x5, x26
	# A[3] * B[3]
	mul	x25, x20, x11
	adcs	x6, x6, x25
	umulh	x7, x20, x11
	adc	x7, x7, xzr
	# A[0] * B[3]
	mul	x25, x16, x11
	adds	x24, x24, x25
	umulh	x26, x16, x11
	adcs	x4, x4, x26
	# A[2] * B[3]
	mul	x25, x19, x11
	adcs	x5, x5, x25
	umulh	x26, x19, x11
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# A[3] * B[0]
	mul	x25, x20, x8
	adds	x24, x24, x25
	umulh	x26, x20, x8
	adcs	x4, x4, x26
	# A[3] * B[2]
	mul	x25, x20, x10
	adcs	x5, x5, x25
	umulh	x26, x20, x10
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	add	x2, x2, #32
	add	x1, x0, #32
	add	x0, x0, #32
	# Multiply
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	# A[0] * B[0]
	umulh	x5, x12, x16
	mul	x4, x12, x16
	# A[2] * B[0]
	umulh	x7, x14, x16
	mul	x6, x14, x16
	# A[1] * B[0]
	mul	x25, x13, x16
	adds	x5, x5, x25
	umulh	x26, x13, x16
	adcs	x6, x6, x26
	# A[1] * B[3]
	umulh	x9, x13, x20
	adc	x7, x7, xzr
	mul	x8, x13, x20
	# A[0] * B[1]
	mul	x25, x12, x17
	adds	x5, x5, x25
	umulh	x26, x12, x17
	adcs	x6, x6, x26
	# A[2] * B[1]
	mul	x25, x14, x17
	adcs	x7, x7, x25
	umulh	x26, x14, x17
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x13, x19
	adds	x7, x7, x25
	umulh	x26, x13, x19
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x12, x19
	adds	x6, x6, x25
	umulh	x26, x12, x19
	adcs	x7, x7, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x13, x17
	adds	x6, x6, x25
	umulh	x26, x13, x17
	adcs	x7, x7, x26
	# A[3] * B[1]
	mul	x25, x15, x17
	adcs	x8, x8, x25
	umulh	x26, x15, x17
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x14, x19
	adds	x8, x8, x25
	umulh	x26, x14, x19
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x15, x20
	adcs	x10, x10, x25
	umulh	x11, x15, x20
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x12, x20
	adds	x7, x7, x25
	umulh	x26, x12, x20
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x14, x20
	adcs	x9, x9, x25
	umulh	x26, x14, x20
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x15, x16
	adds	x7, x7, x25
	umulh	x26, x15, x16
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x15, x19
	adcs	x9, x9, x25
	umulh	x26, x15, x19
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x7, x7, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x7, #63
	mul	x27, x27, x25
	and	x7, x7, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x4, x4, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x5, x5, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x6, x6, x26
	umulh	x10, x25, x10
	adc	x7, x7, xzr
	#  Add high product results in
	adds	x4, x4, x27
	adcs	x5, x5, x8
	adcs	x6, x6, x9
	adc	x7, x7, x10
	mov	x3, x0
	sub	x2, x0, #32
	sub	x1, x0, #32
	# Add
	adds	x8, x21, x4
	adcs	x9, x22, x5
	adcs	x10, x23, x6
	adcs	x11, x24, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x12, x21, x4
	sbcs	x13, x22, x5
	sbcs	x14, x23, x6
	sbcs	x15, x24, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldr	x1, [x29, #24]
	ldr	x2, [x29, #32]
	add	x2, x2, #0x40
	add	x1, x1, #0x60
	add	x0, x0, #0x40
	# Multiply
	ldp	x21, x22, [x1]
	ldp	x23, x24, [x1, #16]
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	# A[0] * B[0]
	umulh	x17, x21, x4
	mul	x16, x21, x4
	# A[2] * B[0]
	umulh	x20, x23, x4
	mul	x19, x23, x4
	# A[1] * B[0]
	mul	x25, x22, x4
	adds	x17, x17, x25
	umulh	x26, x22, x4
	adcs	x19, x19, x26
	# A[1] * B[3]
	umulh	x9, x22, x7
	adc	x20, x20, xzr
	mul	x8, x22, x7
	# A[0] * B[1]
	mul	x25, x21, x5
	adds	x17, x17, x25
	umulh	x26, x21, x5
	adcs	x19, x19, x26
	# A[2] * B[1]
	mul	x25, x23, x5
	adcs	x20, x20, x25
	umulh	x26, x23, x5
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x22, x6
	adds	x20, x20, x25
	umulh	x26, x22, x6
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x21, x6
	adds	x19, x19, x25
	umulh	x26, x21, x6
	adcs	x20, x20, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x22, x5
	adds	x19, x19, x25
	umulh	x26, x22, x5
	adcs	x20, x20, x26
	# A[3] * B[1]
	mul	x25, x24, x5
	adcs	x8, x8, x25
	umulh	x26, x24, x5
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x23, x6
	adds	x8, x8, x25
	umulh	x26, x23, x6
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x24, x7
	adcs	x10, x10, x25
	umulh	x11, x24, x7
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x21, x7
	adds	x20, x20, x25
	umulh	x26, x21, x7
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x23, x7
	adcs	x9, x9, x25
	umulh	x26, x23, x7
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x24, x4
	adds	x20, x20, x25
	umulh	x26, x24, x4
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x24, x6
	adcs	x9, x9, x25
	umulh	x26, x24, x6
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x20, x20, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x20, #63
	mul	x27, x27, x25
	and	x20, x20, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x16, x16, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x17, x17, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x19, x19, x26
	umulh	x10, x25, x10
	adc	x20, x20, xzr
	#  Add high product results in
	adds	x16, x16, x27
	adcs	x17, x17, x8
	adcs	x19, x19, x9
	adc	x20, x20, x10
	sub	x1, x1, #32
	# Double
	ldp	x12, x13, [x1]
	ldp	x14, x15, [x1, #16]
	adds	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adc	x15, x15, x15
	mov	x25, #-19
	asr	x28, x15, #63
	#   Mask the modulus
	and	x25, x28, x25
	and	x26, x28, #0x7fffffffffffffff
	#   Sub modulus (if overflow)
	subs	x12, x12, x25
	sbcs	x13, x13, x28
	sbcs	x14, x14, x28
	sbc	x15, x15, x26
	mov	x3, x0
	sub	x2, x0, #32
	mov	x1, x0
	sub	x0, x0, #32
	# Add
	adds	x8, x12, x16
	adcs	x9, x13, x17
	adcs	x10, x14, x19
	adcs	x11, x15, x20
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x4, x12, x16
	sbcs	x5, x13, x17
	sbcs	x6, x14, x19
	sbcs	x7, x15, x20
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x7, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x4, x4, x25
	sbcs	x5, x5, xzr
	and	x7, x7, #0x7fffffffffffffff
	sbcs	x6, x6, xzr
	sbc	x7, x7, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x4, x5, [x1]
	stp	x6, x7, [x1, #16]
	ldp	x17, x19, [x29, #56]
	ldp	x20, x21, [x29, #72]
	ldp	x22, x23, [x29, #88]
	ldp	x24, x25, [x29, #104]
	ldp	x26, x27, [x29, #120]
	ldr	x28, [x29, #136]
	ldp	x29, x30, [sp], #0x90
	ret
#ifndef __APPLE__
	.size	ge_madd,.-ge_madd
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_msub
.type	ge_msub,@function
.align	2
ge_msub:
#else
.section	__TEXT,__text
.globl	_ge_msub
.p2align	2
_ge_msub:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-144]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #56]
	stp	x20, x21, [x29, #72]
	stp	x22, x23, [x29, #88]
	stp	x24, x25, [x29, #104]
	stp	x26, x27, [x29, #120]
	str	x28, [x29, #136]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	str	x2, [x29, #32]
	mov	x3, x1
	add	x2, x1, #32
	add	x1, x0, #32
	# Add
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x16, x8, x4
	adcs	x17, x9, x5
	adcs	x19, x10, x6
	adcs	x20, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x20, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x16, x16, x25
	adcs	x17, x17, xzr
	and	x20, x20, #0x7fffffffffffffff
	adcs	x19, x19, xzr
	adc	x20, x20, xzr
	# Sub
	subs	x12, x8, x4
	sbcs	x13, x9, x5
	sbcs	x14, x10, x6
	sbcs	x15, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	ldr	x2, [x29, #32]
	add	x2, x2, #32
	mov	x1, x0
	# Multiply
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	# A[0] * B[0]
	umulh	x22, x16, x8
	mul	x21, x16, x8
	# A[2] * B[0]
	umulh	x24, x19, x8
	mul	x23, x19, x8
	# A[1] * B[0]
	mul	x25, x17, x8
	adds	x22, x22, x25
	umulh	x26, x17, x8
	adcs	x23, x23, x26
	# A[1] * B[3]
	umulh	x5, x17, x11
	adc	x24, x24, xzr
	mul	x4, x17, x11
	# A[0] * B[1]
	mul	x25, x16, x9
	adds	x22, x22, x25
	umulh	x26, x16, x9
	adcs	x23, x23, x26
	# A[2] * B[1]
	mul	x25, x19, x9
	adcs	x24, x24, x25
	umulh	x26, x19, x9
	adcs	x4, x4, x26
	adc	x5, x5, xzr
	# A[1] * B[2]
	mul	x25, x17, x10
	adds	x24, x24, x25
	umulh	x26, x17, x10
	adcs	x4, x4, x26
	adcs	x5, x5, xzr
	adc	x6, xzr, xzr
	# A[0] * B[2]
	mul	x25, x16, x10
	adds	x23, x23, x25
	umulh	x26, x16, x10
	adcs	x24, x24, x26
	adcs	x4, x4, xzr
	adcs	x5, x5, xzr
	adc	x6, x6, xzr
	# A[1] * B[1]
	mul	x25, x17, x9
	adds	x23, x23, x25
	umulh	x26, x17, x9
	adcs	x24, x24, x26
	# A[3] * B[1]
	mul	x25, x20, x9
	adcs	x4, x4, x25
	umulh	x26, x20, x9
	adcs	x5, x5, x26
	adc	x6, x6, xzr
	# A[2] * B[2]
	mul	x25, x19, x10
	adds	x4, x4, x25
	umulh	x26, x19, x10
	adcs	x5, x5, x26
	# A[3] * B[3]
	mul	x25, x20, x11
	adcs	x6, x6, x25
	umulh	x7, x20, x11
	adc	x7, x7, xzr
	# A[0] * B[3]
	mul	x25, x16, x11
	adds	x24, x24, x25
	umulh	x26, x16, x11
	adcs	x4, x4, x26
	# A[2] * B[3]
	mul	x25, x19, x11
	adcs	x5, x5, x25
	umulh	x26, x19, x11
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# A[3] * B[0]
	mul	x25, x20, x8
	adds	x24, x24, x25
	umulh	x26, x20, x8
	adcs	x4, x4, x26
	# A[3] * B[2]
	mul	x25, x20, x10
	adcs	x5, x5, x25
	umulh	x26, x20, x10
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	sub	x2, x2, #32
	add	x1, x0, #32
	add	x0, x0, #32
	# Multiply
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	# A[0] * B[0]
	umulh	x5, x12, x16
	mul	x4, x12, x16
	# A[2] * B[0]
	umulh	x7, x14, x16
	mul	x6, x14, x16
	# A[1] * B[0]
	mul	x25, x13, x16
	adds	x5, x5, x25
	umulh	x26, x13, x16
	adcs	x6, x6, x26
	# A[1] * B[3]
	umulh	x9, x13, x20
	adc	x7, x7, xzr
	mul	x8, x13, x20
	# A[0] * B[1]
	mul	x25, x12, x17
	adds	x5, x5, x25
	umulh	x26, x12, x17
	adcs	x6, x6, x26
	# A[2] * B[1]
	mul	x25, x14, x17
	adcs	x7, x7, x25
	umulh	x26, x14, x17
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x13, x19
	adds	x7, x7, x25
	umulh	x26, x13, x19
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x12, x19
	adds	x6, x6, x25
	umulh	x26, x12, x19
	adcs	x7, x7, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x13, x17
	adds	x6, x6, x25
	umulh	x26, x13, x17
	adcs	x7, x7, x26
	# A[3] * B[1]
	mul	x25, x15, x17
	adcs	x8, x8, x25
	umulh	x26, x15, x17
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x14, x19
	adds	x8, x8, x25
	umulh	x26, x14, x19
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x15, x20
	adcs	x10, x10, x25
	umulh	x11, x15, x20
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x12, x20
	adds	x7, x7, x25
	umulh	x26, x12, x20
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x14, x20
	adcs	x9, x9, x25
	umulh	x26, x14, x20
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x15, x16
	adds	x7, x7, x25
	umulh	x26, x15, x16
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x15, x19
	adcs	x9, x9, x25
	umulh	x26, x15, x19
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x7, x7, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x7, #63
	mul	x27, x27, x25
	and	x7, x7, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x4, x4, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x5, x5, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x6, x6, x26
	umulh	x10, x25, x10
	adc	x7, x7, xzr
	#  Add high product results in
	adds	x4, x4, x27
	adcs	x5, x5, x8
	adcs	x6, x6, x9
	adc	x7, x7, x10
	mov	x3, x0
	sub	x2, x0, #32
	sub	x1, x0, #32
	# Add
	adds	x8, x21, x4
	adcs	x9, x22, x5
	adcs	x10, x23, x6
	adcs	x11, x24, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x12, x21, x4
	sbcs	x13, x22, x5
	sbcs	x14, x23, x6
	sbcs	x15, x24, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldr	x1, [x29, #24]
	ldr	x2, [x29, #32]
	add	x2, x2, #0x40
	add	x1, x1, #0x60
	add	x0, x0, #0x40
	# Multiply
	ldp	x21, x22, [x1]
	ldp	x23, x24, [x1, #16]
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	# A[0] * B[0]
	umulh	x17, x21, x4
	mul	x16, x21, x4
	# A[2] * B[0]
	umulh	x20, x23, x4
	mul	x19, x23, x4
	# A[1] * B[0]
	mul	x25, x22, x4
	adds	x17, x17, x25
	umulh	x26, x22, x4
	adcs	x19, x19, x26
	# A[1] * B[3]
	umulh	x9, x22, x7
	adc	x20, x20, xzr
	mul	x8, x22, x7
	# A[0] * B[1]
	mul	x25, x21, x5
	adds	x17, x17, x25
	umulh	x26, x21, x5
	adcs	x19, x19, x26
	# A[2] * B[1]
	mul	x25, x23, x5
	adcs	x20, x20, x25
	umulh	x26, x23, x5
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x22, x6
	adds	x20, x20, x25
	umulh	x26, x22, x6
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x21, x6
	adds	x19, x19, x25
	umulh	x26, x21, x6
	adcs	x20, x20, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x22, x5
	adds	x19, x19, x25
	umulh	x26, x22, x5
	adcs	x20, x20, x26
	# A[3] * B[1]
	mul	x25, x24, x5
	adcs	x8, x8, x25
	umulh	x26, x24, x5
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x23, x6
	adds	x8, x8, x25
	umulh	x26, x23, x6
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x24, x7
	adcs	x10, x10, x25
	umulh	x11, x24, x7
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x21, x7
	adds	x20, x20, x25
	umulh	x26, x21, x7
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x23, x7
	adcs	x9, x9, x25
	umulh	x26, x23, x7
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x24, x4
	adds	x20, x20, x25
	umulh	x26, x24, x4
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x24, x6
	adcs	x9, x9, x25
	umulh	x26, x24, x6
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x20, x20, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x20, #63
	mul	x27, x27, x25
	and	x20, x20, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x16, x16, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x17, x17, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x19, x19, x26
	umulh	x10, x25, x10
	adc	x20, x20, xzr
	#  Add high product results in
	adds	x16, x16, x27
	adcs	x17, x17, x8
	adcs	x19, x19, x9
	adc	x20, x20, x10
	sub	x1, x1, #32
	# Double
	ldp	x12, x13, [x1]
	ldp	x14, x15, [x1, #16]
	adds	x12, x12, x12
	adcs	x13, x13, x13
	adcs	x14, x14, x14
	adc	x15, x15, x15
	mov	x25, #-19
	asr	x28, x15, #63
	#   Mask the modulus
	and	x25, x28, x25
	and	x26, x28, #0x7fffffffffffffff
	#   Sub modulus (if overflow)
	subs	x12, x12, x25
	sbcs	x13, x13, x28
	sbcs	x14, x14, x28
	sbc	x15, x15, x26
	mov	x3, x0
	sub	x2, x0, #32
	sub	x1, x0, #32
	# Add
	adds	x8, x12, x16
	adcs	x9, x13, x17
	adcs	x10, x14, x19
	adcs	x11, x15, x20
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x4, x12, x16
	sbcs	x5, x13, x17
	sbcs	x6, x14, x19
	sbcs	x7, x15, x20
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x7, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x4, x4, x25
	sbcs	x5, x5, xzr
	and	x7, x7, #0x7fffffffffffffff
	sbcs	x6, x6, xzr
	sbc	x7, x7, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x4, x5, [x1]
	stp	x6, x7, [x1, #16]
	ldp	x17, x19, [x29, #56]
	ldp	x20, x21, [x29, #72]
	ldp	x22, x23, [x29, #88]
	ldp	x24, x25, [x29, #104]
	ldp	x26, x27, [x29, #120]
	ldr	x28, [x29, #136]
	ldp	x29, x30, [sp], #0x90
	ret
#ifndef __APPLE__
	.size	ge_msub,.-ge_msub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_add
.type	ge_add,@function
.align	2
ge_add:
#else
.section	__TEXT,__text
.globl	_ge_add
.p2align	2
_ge_add:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-144]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #56]
	stp	x20, x21, [x29, #72]
	stp	x22, x23, [x29, #88]
	stp	x24, x25, [x29, #104]
	stp	x26, x27, [x29, #120]
	str	x28, [x29, #136]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	str	x2, [x29, #32]
	mov	x3, x1
	add	x2, x1, #32
	add	x1, x0, #32
	# Add
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x16, x8, x4
	adcs	x17, x9, x5
	adcs	x19, x10, x6
	adcs	x20, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x20, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x16, x16, x25
	adcs	x17, x17, xzr
	and	x20, x20, #0x7fffffffffffffff
	adcs	x19, x19, xzr
	adc	x20, x20, xzr
	# Sub
	subs	x12, x8, x4
	sbcs	x13, x9, x5
	sbcs	x14, x10, x6
	sbcs	x15, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	ldr	x2, [x29, #32]
	mov	x1, x0
	# Multiply
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	# A[0] * B[0]
	umulh	x22, x16, x8
	mul	x21, x16, x8
	# A[2] * B[0]
	umulh	x24, x19, x8
	mul	x23, x19, x8
	# A[1] * B[0]
	mul	x25, x17, x8
	adds	x22, x22, x25
	umulh	x26, x17, x8
	adcs	x23, x23, x26
	# A[1] * B[3]
	umulh	x5, x17, x11
	adc	x24, x24, xzr
	mul	x4, x17, x11
	# A[0] * B[1]
	mul	x25, x16, x9
	adds	x22, x22, x25
	umulh	x26, x16, x9
	adcs	x23, x23, x26
	# A[2] * B[1]
	mul	x25, x19, x9
	adcs	x24, x24, x25
	umulh	x26, x19, x9
	adcs	x4, x4, x26
	adc	x5, x5, xzr
	# A[1] * B[2]
	mul	x25, x17, x10
	adds	x24, x24, x25
	umulh	x26, x17, x10
	adcs	x4, x4, x26
	adcs	x5, x5, xzr
	adc	x6, xzr, xzr
	# A[0] * B[2]
	mul	x25, x16, x10
	adds	x23, x23, x25
	umulh	x26, x16, x10
	adcs	x24, x24, x26
	adcs	x4, x4, xzr
	adcs	x5, x5, xzr
	adc	x6, x6, xzr
	# A[1] * B[1]
	mul	x25, x17, x9
	adds	x23, x23, x25
	umulh	x26, x17, x9
	adcs	x24, x24, x26
	# A[3] * B[1]
	mul	x25, x20, x9
	adcs	x4, x4, x25
	umulh	x26, x20, x9
	adcs	x5, x5, x26
	adc	x6, x6, xzr
	# A[2] * B[2]
	mul	x25, x19, x10
	adds	x4, x4, x25
	umulh	x26, x19, x10
	adcs	x5, x5, x26
	# A[3] * B[3]
	mul	x25, x20, x11
	adcs	x6, x6, x25
	umulh	x7, x20, x11
	adc	x7, x7, xzr
	# A[0] * B[3]
	mul	x25, x16, x11
	adds	x24, x24, x25
	umulh	x26, x16, x11
	adcs	x4, x4, x26
	# A[2] * B[3]
	mul	x25, x19, x11
	adcs	x5, x5, x25
	umulh	x26, x19, x11
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# A[3] * B[0]
	mul	x25, x20, x8
	adds	x24, x24, x25
	umulh	x26, x20, x8
	adcs	x4, x4, x26
	# A[3] * B[2]
	mul	x25, x20, x10
	adcs	x5, x5, x25
	umulh	x26, x20, x10
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	# Store
	stp	x21, x22, [x0]
	stp	x23, x24, [x0, #16]
	add	x2, x2, #32
	add	x1, x0, #32
	add	x0, x0, #32
	# Multiply
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	# A[0] * B[0]
	umulh	x5, x12, x16
	mul	x4, x12, x16
	# A[2] * B[0]
	umulh	x7, x14, x16
	mul	x6, x14, x16
	# A[1] * B[0]
	mul	x25, x13, x16
	adds	x5, x5, x25
	umulh	x26, x13, x16
	adcs	x6, x6, x26
	# A[1] * B[3]
	umulh	x9, x13, x20
	adc	x7, x7, xzr
	mul	x8, x13, x20
	# A[0] * B[1]
	mul	x25, x12, x17
	adds	x5, x5, x25
	umulh	x26, x12, x17
	adcs	x6, x6, x26
	# A[2] * B[1]
	mul	x25, x14, x17
	adcs	x7, x7, x25
	umulh	x26, x14, x17
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x13, x19
	adds	x7, x7, x25
	umulh	x26, x13, x19
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x12, x19
	adds	x6, x6, x25
	umulh	x26, x12, x19
	adcs	x7, x7, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x13, x17
	adds	x6, x6, x25
	umulh	x26, x13, x17
	adcs	x7, x7, x26
	# A[3] * B[1]
	mul	x25, x15, x17
	adcs	x8, x8, x25
	umulh	x26, x15, x17
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x14, x19
	adds	x8, x8, x25
	umulh	x26, x14, x19
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x15, x20
	adcs	x10, x10, x25
	umulh	x11, x15, x20
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x12, x20
	adds	x7, x7, x25
	umulh	x26, x12, x20
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x14, x20
	adcs	x9, x9, x25
	umulh	x26, x14, x20
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x15, x16
	adds	x7, x7, x25
	umulh	x26, x15, x16
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x15, x19
	adcs	x9, x9, x25
	umulh	x26, x15, x19
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x7, x7, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x7, #63
	mul	x27, x27, x25
	and	x7, x7, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x4, x4, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x5, x5, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x6, x6, x26
	umulh	x10, x25, x10
	adc	x7, x7, xzr
	#  Add high product results in
	adds	x4, x4, x27
	adcs	x5, x5, x8
	adcs	x6, x6, x9
	adc	x7, x7, x10
	# Store
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	mov	x3, x0
	sub	x2, x0, #32
	sub	x1, x0, #32
	# Add
	adds	x8, x21, x4
	adcs	x9, x22, x5
	adcs	x10, x23, x6
	adcs	x11, x24, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x12, x21, x4
	sbcs	x13, x22, x5
	sbcs	x14, x23, x6
	sbcs	x15, x24, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldr	x1, [x29, #24]
	ldr	x2, [x29, #32]
	add	x2, x2, #0x60
	add	x1, x1, #0x60
	add	x0, x0, #0x40
	# Multiply
	ldp	x21, x22, [x1]
	ldp	x23, x24, [x1, #16]
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	# A[0] * B[0]
	umulh	x17, x21, x4
	mul	x16, x21, x4
	# A[2] * B[0]
	umulh	x20, x23, x4
	mul	x19, x23, x4
	# A[1] * B[0]
	mul	x25, x22, x4
	adds	x17, x17, x25
	umulh	x26, x22, x4
	adcs	x19, x19, x26
	# A[1] * B[3]
	umulh	x9, x22, x7
	adc	x20, x20, xzr
	mul	x8, x22, x7
	# A[0] * B[1]
	mul	x25, x21, x5
	adds	x17, x17, x25
	umulh	x26, x21, x5
	adcs	x19, x19, x26
	# A[2] * B[1]
	mul	x25, x23, x5
	adcs	x20, x20, x25
	umulh	x26, x23, x5
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x22, x6
	adds	x20, x20, x25
	umulh	x26, x22, x6
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x21, x6
	adds	x19, x19, x25
	umulh	x26, x21, x6
	adcs	x20, x20, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x22, x5
	adds	x19, x19, x25
	umulh	x26, x22, x5
	adcs	x20, x20, x26
	# A[3] * B[1]
	mul	x25, x24, x5
	adcs	x8, x8, x25
	umulh	x26, x24, x5
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x23, x6
	adds	x8, x8, x25
	umulh	x26, x23, x6
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x24, x7
	adcs	x10, x10, x25
	umulh	x11, x24, x7
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x21, x7
	adds	x20, x20, x25
	umulh	x26, x21, x7
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x23, x7
	adcs	x9, x9, x25
	umulh	x26, x23, x7
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x24, x4
	adds	x20, x20, x25
	umulh	x26, x24, x4
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x24, x6
	adcs	x9, x9, x25
	umulh	x26, x24, x6
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x20, x20, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x20, #63
	mul	x27, x27, x25
	and	x20, x20, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x16, x16, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x17, x17, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x19, x19, x26
	umulh	x10, x25, x10
	adc	x20, x20, xzr
	#  Add high product results in
	adds	x16, x16, x27
	adcs	x17, x17, x8
	adcs	x19, x19, x9
	adc	x20, x20, x10
	# Store
	stp	x16, x17, [x0]
	stp	x19, x20, [x0, #16]
	sub	x3, x2, #32
	sub	x2, x1, #32
	sub	x1, x0, #32
	# Multiply
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	ldp	x12, x13, [x3]
	ldp	x14, x15, [x3, #16]
	# A[0] * B[0]
	umulh	x9, x4, x12
	mul	x8, x4, x12
	# A[2] * B[0]
	umulh	x11, x6, x12
	mul	x10, x6, x12
	# A[1] * B[0]
	mul	x25, x5, x12
	adds	x9, x9, x25
	umulh	x26, x5, x12
	adcs	x10, x10, x26
	# A[1] * B[3]
	umulh	x17, x5, x15
	adc	x11, x11, xzr
	mul	x16, x5, x15
	# A[0] * B[1]
	mul	x25, x4, x13
	adds	x9, x9, x25
	umulh	x26, x4, x13
	adcs	x10, x10, x26
	# A[2] * B[1]
	mul	x25, x6, x13
	adcs	x11, x11, x25
	umulh	x26, x6, x13
	adcs	x16, x16, x26
	adc	x17, x17, xzr
	# A[1] * B[2]
	mul	x25, x5, x14
	adds	x11, x11, x25
	umulh	x26, x5, x14
	adcs	x16, x16, x26
	adcs	x17, x17, xzr
	adc	x19, xzr, xzr
	# A[0] * B[2]
	mul	x25, x4, x14
	adds	x10, x10, x25
	umulh	x26, x4, x14
	adcs	x11, x11, x26
	adcs	x16, x16, xzr
	adcs	x17, x17, xzr
	adc	x19, x19, xzr
	# A[1] * B[1]
	mul	x25, x5, x13
	adds	x10, x10, x25
	umulh	x26, x5, x13
	adcs	x11, x11, x26
	# A[3] * B[1]
	mul	x25, x7, x13
	adcs	x16, x16, x25
	umulh	x26, x7, x13
	adcs	x17, x17, x26
	adc	x19, x19, xzr
	# A[2] * B[2]
	mul	x25, x6, x14
	adds	x16, x16, x25
	umulh	x26, x6, x14
	adcs	x17, x17, x26
	# A[3] * B[3]
	mul	x25, x7, x15
	adcs	x19, x19, x25
	umulh	x20, x7, x15
	adc	x20, x20, xzr
	# A[0] * B[3]
	mul	x25, x4, x15
	adds	x11, x11, x25
	umulh	x26, x4, x15
	adcs	x16, x16, x26
	# A[2] * B[3]
	mul	x25, x6, x15
	adcs	x17, x17, x25
	umulh	x26, x6, x15
	adcs	x19, x19, x26
	adc	x20, x20, xzr
	# A[3] * B[0]
	mul	x25, x7, x12
	adds	x11, x11, x25
	umulh	x26, x7, x12
	adcs	x16, x16, x26
	# A[3] * B[2]
	mul	x25, x7, x14
	adcs	x17, x17, x25
	umulh	x26, x7, x14
	adcs	x19, x19, x26
	adc	x20, x20, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x20
	adds	x11, x11, x26
	umulh	x27, x25, x20
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x11, #63
	mul	x27, x27, x25
	and	x11, x11, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x16
	adds	x8, x8, x26
	umulh	x16, x25, x16
	mul	x26, x25, x17
	adcs	x9, x9, x26
	umulh	x17, x25, x17
	mul	x26, x25, x19
	adcs	x10, x10, x26
	umulh	x19, x25, x19
	adc	x11, x11, xzr
	#  Add high product results in
	adds	x8, x8, x27
	adcs	x9, x9, x16
	adcs	x10, x10, x17
	adc	x11, x11, x19
	# Double
	adds	x8, x8, x8
	adcs	x9, x9, x9
	adcs	x10, x10, x10
	adc	x11, x11, x11
	mov	x25, #-19
	asr	x28, x11, #63
	#   Mask the modulus
	and	x25, x28, x25
	and	x26, x28, #0x7fffffffffffffff
	#   Sub modulus (if overflow)
	subs	x8, x8, x25
	sbcs	x9, x9, x28
	sbcs	x10, x10, x28
	sbc	x11, x11, x26
	mov	x3, x0
	sub	x2, x0, #32
	mov	x1, x0
	sub	x0, x0, #32
	# Add
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x21, x8, x4
	adcs	x22, x9, x5
	adcs	x23, x10, x6
	adcs	x24, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x24, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x21, x21, x25
	adcs	x22, x22, xzr
	and	x24, x24, #0x7fffffffffffffff
	adcs	x23, x23, xzr
	adc	x24, x24, xzr
	# Sub
	subs	x12, x8, x4
	sbcs	x13, x9, x5
	sbcs	x14, x10, x6
	sbcs	x15, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x21, x22, [x0]
	stp	x23, x24, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldp	x17, x19, [x29, #56]
	ldp	x20, x21, [x29, #72]
	ldp	x22, x23, [x29, #88]
	ldp	x24, x25, [x29, #104]
	ldp	x26, x27, [x29, #120]
	ldr	x28, [x29, #136]
	ldp	x29, x30, [sp], #0x90
	ret
#ifndef __APPLE__
	.size	ge_add,.-ge_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	ge_sub
.type	ge_sub,@function
.align	2
ge_sub:
#else
.section	__TEXT,__text
.globl	_ge_sub
.p2align	2
_ge_sub:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-144]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #56]
	stp	x20, x21, [x29, #72]
	stp	x22, x23, [x29, #88]
	stp	x24, x25, [x29, #104]
	stp	x26, x27, [x29, #120]
	str	x28, [x29, #136]
	str	x0, [x29, #16]
	str	x1, [x29, #24]
	str	x2, [x29, #32]
	mov	x3, x1
	add	x2, x1, #32
	add	x1, x0, #32
	# Add
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x16, x8, x4
	adcs	x17, x9, x5
	adcs	x19, x10, x6
	adcs	x20, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x20, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x16, x16, x25
	adcs	x17, x17, xzr
	and	x20, x20, #0x7fffffffffffffff
	adcs	x19, x19, xzr
	adc	x20, x20, xzr
	# Sub
	subs	x12, x8, x4
	sbcs	x13, x9, x5
	sbcs	x14, x10, x6
	sbcs	x15, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	ldr	x2, [x29, #32]
	add	x2, x2, #32
	mov	x1, x0
	# Multiply
	ldp	x8, x9, [x2]
	ldp	x10, x11, [x2, #16]
	# A[0] * B[0]
	umulh	x22, x16, x8
	mul	x21, x16, x8
	# A[2] * B[0]
	umulh	x24, x19, x8
	mul	x23, x19, x8
	# A[1] * B[0]
	mul	x25, x17, x8
	adds	x22, x22, x25
	umulh	x26, x17, x8
	adcs	x23, x23, x26
	# A[1] * B[3]
	umulh	x5, x17, x11
	adc	x24, x24, xzr
	mul	x4, x17, x11
	# A[0] * B[1]
	mul	x25, x16, x9
	adds	x22, x22, x25
	umulh	x26, x16, x9
	adcs	x23, x23, x26
	# A[2] * B[1]
	mul	x25, x19, x9
	adcs	x24, x24, x25
	umulh	x26, x19, x9
	adcs	x4, x4, x26
	adc	x5, x5, xzr
	# A[1] * B[2]
	mul	x25, x17, x10
	adds	x24, x24, x25
	umulh	x26, x17, x10
	adcs	x4, x4, x26
	adcs	x5, x5, xzr
	adc	x6, xzr, xzr
	# A[0] * B[2]
	mul	x25, x16, x10
	adds	x23, x23, x25
	umulh	x26, x16, x10
	adcs	x24, x24, x26
	adcs	x4, x4, xzr
	adcs	x5, x5, xzr
	adc	x6, x6, xzr
	# A[1] * B[1]
	mul	x25, x17, x9
	adds	x23, x23, x25
	umulh	x26, x17, x9
	adcs	x24, x24, x26
	# A[3] * B[1]
	mul	x25, x20, x9
	adcs	x4, x4, x25
	umulh	x26, x20, x9
	adcs	x5, x5, x26
	adc	x6, x6, xzr
	# A[2] * B[2]
	mul	x25, x19, x10
	adds	x4, x4, x25
	umulh	x26, x19, x10
	adcs	x5, x5, x26
	# A[3] * B[3]
	mul	x25, x20, x11
	adcs	x6, x6, x25
	umulh	x7, x20, x11
	adc	x7, x7, xzr
	# A[0] * B[3]
	mul	x25, x16, x11
	adds	x24, x24, x25
	umulh	x26, x16, x11
	adcs	x4, x4, x26
	# A[2] * B[3]
	mul	x25, x19, x11
	adcs	x5, x5, x25
	umulh	x26, x19, x11
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# A[3] * B[0]
	mul	x25, x20, x8
	adds	x24, x24, x25
	umulh	x26, x20, x8
	adcs	x4, x4, x26
	# A[3] * B[2]
	mul	x25, x20, x10
	adcs	x5, x5, x25
	umulh	x26, x20, x10
	adcs	x6, x6, x26
	adc	x7, x7, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x7
	adds	x24, x24, x26
	umulh	x27, x25, x7
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x24, #63
	mul	x27, x27, x25
	and	x24, x24, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x4
	adds	x21, x21, x26
	umulh	x4, x25, x4
	mul	x26, x25, x5
	adcs	x22, x22, x26
	umulh	x5, x25, x5
	mul	x26, x25, x6
	adcs	x23, x23, x26
	umulh	x6, x25, x6
	adc	x24, x24, xzr
	#  Add high product results in
	adds	x21, x21, x27
	adcs	x22, x22, x4
	adcs	x23, x23, x5
	adc	x24, x24, x6
	# Reduce if top bit set
	mov	x25, #19
	and	x26, x25, x24, asr 63
	adds	x21, x21, x26
	adcs	x22, x22, xzr
	and	x24, x24, #0x7fffffffffffffff
	adcs	x23, x23, xzr
	adc	x24, x24, xzr
	# Store
	stp	x21, x22, [x0]
	stp	x23, x24, [x0, #16]
	sub	x2, x2, #32
	add	x1, x0, #32
	add	x0, x0, #32
	# Multiply
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	# A[0] * B[0]
	umulh	x5, x12, x16
	mul	x4, x12, x16
	# A[2] * B[0]
	umulh	x7, x14, x16
	mul	x6, x14, x16
	# A[1] * B[0]
	mul	x25, x13, x16
	adds	x5, x5, x25
	umulh	x26, x13, x16
	adcs	x6, x6, x26
	# A[1] * B[3]
	umulh	x9, x13, x20
	adc	x7, x7, xzr
	mul	x8, x13, x20
	# A[0] * B[1]
	mul	x25, x12, x17
	adds	x5, x5, x25
	umulh	x26, x12, x17
	adcs	x6, x6, x26
	# A[2] * B[1]
	mul	x25, x14, x17
	adcs	x7, x7, x25
	umulh	x26, x14, x17
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x13, x19
	adds	x7, x7, x25
	umulh	x26, x13, x19
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x12, x19
	adds	x6, x6, x25
	umulh	x26, x12, x19
	adcs	x7, x7, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x13, x17
	adds	x6, x6, x25
	umulh	x26, x13, x17
	adcs	x7, x7, x26
	# A[3] * B[1]
	mul	x25, x15, x17
	adcs	x8, x8, x25
	umulh	x26, x15, x17
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x14, x19
	adds	x8, x8, x25
	umulh	x26, x14, x19
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x15, x20
	adcs	x10, x10, x25
	umulh	x11, x15, x20
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x12, x20
	adds	x7, x7, x25
	umulh	x26, x12, x20
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x14, x20
	adcs	x9, x9, x25
	umulh	x26, x14, x20
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x15, x16
	adds	x7, x7, x25
	umulh	x26, x15, x16
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x15, x19
	adcs	x9, x9, x25
	umulh	x26, x15, x19
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x7, x7, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x7, #63
	mul	x27, x27, x25
	and	x7, x7, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x4, x4, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x5, x5, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x6, x6, x26
	umulh	x10, x25, x10
	adc	x7, x7, xzr
	#  Add high product results in
	adds	x4, x4, x27
	adcs	x5, x5, x8
	adcs	x6, x6, x9
	adc	x7, x7, x10
	# Store
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	mov	x3, x0
	sub	x2, x0, #32
	sub	x1, x0, #32
	# Add
	adds	x8, x21, x4
	adcs	x9, x22, x5
	adcs	x10, x23, x6
	adcs	x11, x24, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x11, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x8, x8, x25
	adcs	x9, x9, xzr
	and	x11, x11, #0x7fffffffffffffff
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	# Sub
	subs	x12, x21, x4
	sbcs	x13, x22, x5
	sbcs	x14, x23, x6
	sbcs	x15, x24, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x12, x12, x25
	sbcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	sbcs	x14, x14, xzr
	sbc	x15, x15, xzr
	stp	x8, x9, [x0]
	stp	x10, x11, [x0, #16]
	stp	x12, x13, [x1]
	stp	x14, x15, [x1, #16]
	ldr	x1, [x29, #24]
	ldr	x2, [x29, #32]
	add	x2, x2, #0x60
	add	x1, x1, #0x60
	add	x0, x0, #0x40
	# Multiply
	ldp	x21, x22, [x1]
	ldp	x23, x24, [x1, #16]
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	# A[0] * B[0]
	umulh	x17, x21, x4
	mul	x16, x21, x4
	# A[2] * B[0]
	umulh	x20, x23, x4
	mul	x19, x23, x4
	# A[1] * B[0]
	mul	x25, x22, x4
	adds	x17, x17, x25
	umulh	x26, x22, x4
	adcs	x19, x19, x26
	# A[1] * B[3]
	umulh	x9, x22, x7
	adc	x20, x20, xzr
	mul	x8, x22, x7
	# A[0] * B[1]
	mul	x25, x21, x5
	adds	x17, x17, x25
	umulh	x26, x21, x5
	adcs	x19, x19, x26
	# A[2] * B[1]
	mul	x25, x23, x5
	adcs	x20, x20, x25
	umulh	x26, x23, x5
	adcs	x8, x8, x26
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x25, x22, x6
	adds	x20, x20, x25
	umulh	x26, x22, x6
	adcs	x8, x8, x26
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x25, x21, x6
	adds	x19, x19, x25
	umulh	x26, x21, x6
	adcs	x20, x20, x26
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x25, x22, x5
	adds	x19, x19, x25
	umulh	x26, x22, x5
	adcs	x20, x20, x26
	# A[3] * B[1]
	mul	x25, x24, x5
	adcs	x8, x8, x25
	umulh	x26, x24, x5
	adcs	x9, x9, x26
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x25, x23, x6
	adds	x8, x8, x25
	umulh	x26, x23, x6
	adcs	x9, x9, x26
	# A[3] * B[3]
	mul	x25, x24, x7
	adcs	x10, x10, x25
	umulh	x11, x24, x7
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x25, x21, x7
	adds	x20, x20, x25
	umulh	x26, x21, x7
	adcs	x8, x8, x26
	# A[2] * B[3]
	mul	x25, x23, x7
	adcs	x9, x9, x25
	umulh	x26, x23, x7
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x25, x24, x4
	adds	x20, x20, x25
	umulh	x26, x24, x4
	adcs	x8, x8, x26
	# A[3] * B[2]
	mul	x25, x24, x6
	adcs	x9, x9, x25
	umulh	x26, x24, x6
	adcs	x10, x10, x26
	adc	x11, x11, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x11
	adds	x20, x20, x26
	umulh	x27, x25, x11
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x20, #63
	mul	x27, x27, x25
	and	x20, x20, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x8
	adds	x16, x16, x26
	umulh	x8, x25, x8
	mul	x26, x25, x9
	adcs	x17, x17, x26
	umulh	x9, x25, x9
	mul	x26, x25, x10
	adcs	x19, x19, x26
	umulh	x10, x25, x10
	adc	x20, x20, xzr
	#  Add high product results in
	adds	x16, x16, x27
	adcs	x17, x17, x8
	adcs	x19, x19, x9
	adc	x20, x20, x10
	# Reduce if top bit set
	mov	x25, #19
	and	x26, x25, x20, asr 63
	adds	x16, x16, x26
	adcs	x17, x17, xzr
	and	x20, x20, #0x7fffffffffffffff
	adcs	x19, x19, xzr
	adc	x20, x20, xzr
	# Store
	stp	x16, x17, [x0]
	stp	x19, x20, [x0, #16]
	sub	x3, x2, #32
	sub	x2, x1, #32
	sub	x1, x0, #32
	# Multiply
	ldp	x4, x5, [x2]
	ldp	x6, x7, [x2, #16]
	ldp	x12, x13, [x3]
	ldp	x14, x15, [x3, #16]
	# A[0] * B[0]
	umulh	x9, x4, x12
	mul	x8, x4, x12
	# A[2] * B[0]
	umulh	x11, x6, x12
	mul	x10, x6, x12
	# A[1] * B[0]
	mul	x25, x5, x12
	adds	x9, x9, x25
	umulh	x26, x5, x12
	adcs	x10, x10, x26
	# A[1] * B[3]
	umulh	x17, x5, x15
	adc	x11, x11, xzr
	mul	x16, x5, x15
	# A[0] * B[1]
	mul	x25, x4, x13
	adds	x9, x9, x25
	umulh	x26, x4, x13
	adcs	x10, x10, x26
	# A[2] * B[1]
	mul	x25, x6, x13
	adcs	x11, x11, x25
	umulh	x26, x6, x13
	adcs	x16, x16, x26
	adc	x17, x17, xzr
	# A[1] * B[2]
	mul	x25, x5, x14
	adds	x11, x11, x25
	umulh	x26, x5, x14
	adcs	x16, x16, x26
	adcs	x17, x17, xzr
	adc	x19, xzr, xzr
	# A[0] * B[2]
	mul	x25, x4, x14
	adds	x10, x10, x25
	umulh	x26, x4, x14
	adcs	x11, x11, x26
	adcs	x16, x16, xzr
	adcs	x17, x17, xzr
	adc	x19, x19, xzr
	# A[1] * B[1]
	mul	x25, x5, x13
	adds	x10, x10, x25
	umulh	x26, x5, x13
	adcs	x11, x11, x26
	# A[3] * B[1]
	mul	x25, x7, x13
	adcs	x16, x16, x25
	umulh	x26, x7, x13
	adcs	x17, x17, x26
	adc	x19, x19, xzr
	# A[2] * B[2]
	mul	x25, x6, x14
	adds	x16, x16, x25
	umulh	x26, x6, x14
	adcs	x17, x17, x26
	# A[3] * B[3]
	mul	x25, x7, x15
	adcs	x19, x19, x25
	umulh	x20, x7, x15
	adc	x20, x20, xzr
	# A[0] * B[3]
	mul	x25, x4, x15
	adds	x11, x11, x25
	umulh	x26, x4, x15
	adcs	x16, x16, x26
	# A[2] * B[3]
	mul	x25, x6, x15
	adcs	x17, x17, x25
	umulh	x26, x6, x15
	adcs	x19, x19, x26
	adc	x20, x20, xzr
	# A[3] * B[0]
	mul	x25, x7, x12
	adds	x11, x11, x25
	umulh	x26, x7, x12
	adcs	x16, x16, x26
	# A[3] * B[2]
	mul	x25, x7, x14
	adcs	x17, x17, x25
	umulh	x26, x7, x14
	adcs	x19, x19, x26
	adc	x20, x20, xzr
	# Reduce
	mov	x25, #38
	mul	x26, x25, x20
	adds	x11, x11, x26
	umulh	x27, x25, x20
	adc	x27, x27, xzr
	mov	x25, #19
	extr	x27, x27, x11, #63
	mul	x27, x27, x25
	and	x11, x11, #0x7fffffffffffffff
	mov	x25, #38
	mul	x26, x25, x16
	adds	x8, x8, x26
	umulh	x16, x25, x16
	mul	x26, x25, x17
	adcs	x9, x9, x26
	umulh	x17, x25, x17
	mul	x26, x25, x19
	adcs	x10, x10, x26
	umulh	x19, x25, x19
	adc	x11, x11, xzr
	#  Add high product results in
	adds	x8, x8, x27
	adcs	x9, x9, x16
	adcs	x10, x10, x17
	adc	x11, x11, x19
	# Double
	adds	x8, x8, x8
	adcs	x9, x9, x9
	adcs	x10, x10, x10
	adc	x11, x11, x11
	mov	x25, #-19
	asr	x28, x11, #63
	#   Mask the modulus
	and	x25, x28, x25
	and	x26, x28, #0x7fffffffffffffff
	#   Sub modulus (if overflow)
	subs	x8, x8, x25
	sbcs	x9, x9, x28
	sbcs	x10, x10, x28
	sbc	x11, x11, x26
	mov	x3, x0
	sub	x2, x0, #32
	# Add
	ldp	x4, x5, [x3]
	ldp	x6, x7, [x3, #16]
	adds	x12, x8, x4
	adcs	x13, x9, x5
	adcs	x14, x10, x6
	adcs	x15, x11, x7
	cset	x28, cs
	mov	x25, #19
	extr	x28, x28, x15, #63
	mul	x25, x28, x25
	#   Sub modulus (if overflow)
	adds	x12, x12, x25
	adcs	x13, x13, xzr
	and	x15, x15, #0x7fffffffffffffff
	adcs	x14, x14, xzr
	adc	x15, x15, xzr
	# Sub
	subs	x21, x8, x4
	sbcs	x22, x9, x5
	sbcs	x23, x10, x6
	sbcs	x24, x11, x7
	csetm	x28, cc
	mov	x25, #-19
	extr	x28, x28, x24, #63
	mul	x25, x28, x25
	#   Add modulus (if underflow)
	subs	x21, x21, x25
	sbcs	x22, x22, xzr
	and	x24, x24, #0x7fffffffffffffff
	sbcs	x23, x23, xzr
	sbc	x24, x24, xzr
	stp	x12, x13, [x0]
	stp	x14, x15, [x0, #16]
	stp	x21, x22, [x1]
	stp	x23, x24, [x1, #16]
	ldp	x17, x19, [x29, #56]
	ldp	x20, x21, [x29, #72]
	ldp	x22, x23, [x29, #88]
	ldp	x24, x25, [x29, #104]
	ldp	x26, x27, [x29, #120]
	ldr	x28, [x29, #136]
	ldp	x29, x30, [sp], #0x90
	ret
#ifndef __APPLE__
	.size	ge_sub,.-ge_sub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	sc_reduce
.type	sc_reduce,@function
.align	2
sc_reduce:
#else
.section	__TEXT,__text
.globl	_sc_reduce
.p2align	2
_sc_reduce:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-64]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #16]
	stp	x20, x21, [x29, #32]
	stp	x22, x23, [x29, #48]
	ldp	x2, x3, [x0]
	ldp	x4, x5, [x0, #16]
	ldp	x6, x7, [x0, #32]
	ldp	x8, x9, [x0, #48]
	lsr	x23, x9, #56
	lsl	x9, x9, #4
	orr	x9, x9, x8, lsr 60
	lsl	x8, x8, #4
	orr	x8, x8, x7, lsr 60
	lsl	x7, x7, #4
	orr	x7, x7, x6, lsr 60
	lsl	x6, x6, #4
	mov	x1, #15
	orr	x6, x6, x5, lsr 60
	bic	x5, x5, x1, lsl 60
	bic	x9, x9, x1, lsl 60
	# Add order times bits 504..511
	mov	x11, #0x2c13
	movk	x11, #0xa30a, lsl 16
	movk	x11, #0x9ce5, lsl 32
	movk	x11, #0xa7ed, lsl 48
	mov	x13, #0x6329
	movk	x13, #0x5d08, lsl 16
	movk	x13, #0x621, lsl 32
	movk	x13, #0xeb21, lsl 48
	mul	x10, x23, x11
	umulh	x11, x23, x11
	mul	x12, x23, x13
	umulh	x13, x23, x13
	adds	x6, x6, x10
	adcs	x7, x7, x11
	adcs	x8, x8, xzr
	adc	x9, x9, xzr
	adds	x7, x7, x12
	adcs	x8, x8, x13
	adc	x9, x9, xzr
	subs	x8, x8, x23
	sbc	x9, x9, xzr
	# Sub product of top 4 words and order
	mov	x1, #0x2c13
	movk	x1, #0xa30a, lsl 16
	movk	x1, #0x9ce5, lsl 32
	movk	x1, #0xa7ed, lsl 48
	mul	x10, x6, x1
	umulh	x11, x6, x1
	mul	x12, x7, x1
	umulh	x13, x7, x1
	mul	x14, x8, x1
	umulh	x15, x8, x1
	mul	x16, x9, x1
	umulh	x17, x9, x1
	adds	x2, x2, x10
	adcs	x3, x3, x11
	adcs	x4, x4, x14
	adcs	x5, x5, x15
	adc	x19, xzr, xzr
	adds	x3, x3, x12
	adcs	x4, x4, x13
	adcs	x5, x5, x16
	adc	x19, x19, x17
	mov	x1, #0x6329
	movk	x1, #0x5d08, lsl 16
	movk	x1, #0x621, lsl 32
	movk	x1, #0xeb21, lsl 48
	mul	x10, x6, x1
	umulh	x11, x6, x1
	mul	x12, x7, x1
	umulh	x13, x7, x1
	mul	x14, x8, x1
	umulh	x15, x8, x1
	mul	x16, x9, x1
	umulh	x17, x9, x1
	adds	x3, x3, x10
	adcs	x4, x4, x11
	adcs	x5, x5, x14
	adcs	x19, x19, x15
	adc	x20, xzr, xzr
	adds	x4, x4, x12
	adcs	x5, x5, x13
	adcs	x19, x19, x16
	adc	x20, x20, x17
	subs	x4, x4, x6
	sbcs	x5, x5, x7
	sbcs	x6, x19, x8
	sbc	x7, x20, x9
	asr	x23, x7, #57
	#   Conditionally subtract order starting at bit 125
	mov	x10, xzr
	mov	x13, xzr
	mov	x11, #0xba7d
	movk	x11, #0x4b9e, lsl 16
	movk	x11, #0x4c63, lsl 32
	movk	x11, #0xcb02, lsl 48
	mov	x12, #0xf39a
	movk	x12, #0xd45e, lsl 16
	movk	x12, #0xdf3b, lsl 32
	movk	x12, #0x29b, lsl 48
	movk	x10, #0xa000, lsl 48
	movk	x13, #0x200, lsl 48
	and	x10, x10, x23
	and	x11, x11, x23
	and	x12, x12, x23
	and	x13, x13, x23
	adds	x3, x3, x10
	adcs	x4, x4, x11
	adcs	x5, x5, x12
	adcs	x6, x6, xzr
	adc	x7, x7, x13
	#   Move bits 252-376 to own registers
	lsl	x7, x7, #4
	orr	x7, x7, x6, lsr 60
	lsl	x6, x6, #4
	mov	x23, #15
	orr	x6, x6, x5, lsr 60
	bic	x5, x5, x23, lsl 60
	# Sub product of top 2 words and order
	#   * -5812631a5cf5d3ed
	mov	x1, #0x2c13
	movk	x1, #0xa30a, lsl 16
	movk	x1, #0x9ce5, lsl 32
	movk	x1, #0xa7ed, lsl 48
	mul	x10, x6, x1
	umulh	x11, x6, x1
	mul	x12, x7, x1
	umulh	x13, x7, x1
	adds	x2, x2, x10
	adcs	x3, x3, x11
	adc	x19, xzr, xzr
	adds	x3, x3, x12
	adc	x19, x19, x13
	#   * -14def9dea2f79cd7
	mov	x1, #0x6329
	movk	x1, #0x5d08, lsl 16
	movk	x1, #0x621, lsl 32
	movk	x1, #0xeb21, lsl 48
	mul	x10, x6, x1
	umulh	x11, x6, x1
	mul	x12, x7, x1
	umulh	x13, x7, x1
	adds	x3, x3, x10
	adcs	x4, x4, x11
	adc	x20, xzr, xzr
	adds	x4, x4, x12
	adc	x20, x20, x13
	#   Add overflows at 2 * 64
	mov	x1, #15
	bic	x5, x5, x1, lsl 60
	adds	x4, x4, x19
	adc	x5, x5, x20
	#   Subtract top at 2 * 64
	subs	x4, x4, x6
	sbcs	x5, x5, x7
	sbc	x1, x1, x1
	#   Conditional sub order
	mov	x10, #0xd3ed
	movk	x10, #0x5cf5, lsl 16
	movk	x10, #0x631a, lsl 32
	movk	x10, #0x5812, lsl 48
	mov	x11, #0x9cd6
	movk	x11, #0xa2f7, lsl 16
	movk	x11, #0xf9de, lsl 32
	movk	x11, #0x14de, lsl 48
	and	x10, x10, x1
	and	x11, x11, x1
	adds	x2, x2, x10
	adcs	x3, x3, x11
	and	x1, x1, #0x1000000000000000
	adcs	x4, x4, xzr
	mov	x23, #15
	adc	x5, x5, x1
	bic	x5, x5, x23, lsl 60
	# Store result
	stp	x2, x3, [x0]
	stp	x4, x5, [x0, #16]
	ldp	x17, x19, [x29, #16]
	ldp	x20, x21, [x29, #32]
	ldp	x22, x23, [x29, #48]
	ldp	x29, x30, [sp], #0x40
	ret
#ifndef __APPLE__
	.size	sc_reduce,.-sc_reduce
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl	sc_muladd
.type	sc_muladd,@function
.align	2
sc_muladd:
#else
.section	__TEXT,__text
.globl	_sc_muladd
.p2align	2
_sc_muladd:
#endif /* __APPLE__ */
	stp	x29, x30, [sp, #-96]!
	add	x29, sp, #0
	stp	x17, x19, [x29, #24]
	stp	x20, x21, [x29, #40]
	stp	x22, x23, [x29, #56]
	stp	x24, x25, [x29, #72]
	str	x26, [x29, #88]
	# Multiply
	ldp	x12, x13, [x1]
	ldp	x14, x15, [x1, #16]
	ldp	x16, x17, [x2]
	ldp	x19, x20, [x2, #16]
	# A[0] * B[0]
	umulh	x5, x12, x16
	mul	x4, x12, x16
	# A[2] * B[0]
	umulh	x7, x14, x16
	mul	x6, x14, x16
	# A[1] * B[0]
	mul	x21, x13, x16
	adds	x5, x5, x21
	umulh	x22, x13, x16
	adcs	x6, x6, x22
	# A[1] * B[3]
	umulh	x9, x13, x20
	adc	x7, x7, xzr
	mul	x8, x13, x20
	# A[0] * B[1]
	mul	x21, x12, x17
	adds	x5, x5, x21
	umulh	x22, x12, x17
	adcs	x6, x6, x22
	# A[2] * B[1]
	mul	x21, x14, x17
	adcs	x7, x7, x21
	umulh	x22, x14, x17
	adcs	x8, x8, x22
	adc	x9, x9, xzr
	# A[1] * B[2]
	mul	x21, x13, x19
	adds	x7, x7, x21
	umulh	x22, x13, x19
	adcs	x8, x8, x22
	adcs	x9, x9, xzr
	adc	x10, xzr, xzr
	# A[0] * B[2]
	mul	x21, x12, x19
	adds	x6, x6, x21
	umulh	x22, x12, x19
	adcs	x7, x7, x22
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adc	x10, x10, xzr
	# A[1] * B[1]
	mul	x21, x13, x17
	adds	x6, x6, x21
	umulh	x22, x13, x17
	adcs	x7, x7, x22
	# A[3] * B[1]
	mul	x21, x15, x17
	adcs	x8, x8, x21
	umulh	x22, x15, x17
	adcs	x9, x9, x22
	adc	x10, x10, xzr
	# A[2] * B[2]
	mul	x21, x14, x19
	adds	x8, x8, x21
	umulh	x22, x14, x19
	adcs	x9, x9, x22
	# A[3] * B[3]
	mul	x21, x15, x20
	adcs	x10, x10, x21
	umulh	x11, x15, x20
	adc	x11, x11, xzr
	# A[0] * B[3]
	mul	x21, x12, x20
	adds	x7, x7, x21
	umulh	x22, x12, x20
	adcs	x8, x8, x22
	# A[2] * B[3]
	mul	x21, x14, x20
	adcs	x9, x9, x21
	umulh	x22, x14, x20
	adcs	x10, x10, x22
	adc	x11, x11, xzr
	# A[3] * B[0]
	mul	x21, x15, x16
	adds	x7, x7, x21
	umulh	x22, x15, x16
	adcs	x8, x8, x22
	# A[3] * B[2]
	mul	x21, x15, x19
	adcs	x9, x9, x21
	umulh	x22, x15, x19
	adcs	x10, x10, x22
	adc	x11, x11, xzr
	# Add c to a * b
	ldp	x12, x13, [x3]
	ldp	x14, x15, [x3, #16]
	adds	x4, x4, x12
	adcs	x5, x5, x13
	adcs	x6, x6, x14
	adcs	x7, x7, x15
	adcs	x8, x8, xzr
	adcs	x9, x9, xzr
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	lsr	x25, x11, #56
	lsl	x11, x11, #4
	orr	x11, x11, x10, lsr 60
	lsl	x10, x10, #4
	orr	x10, x10, x9, lsr 60
	lsl	x9, x9, #4
	orr	x9, x9, x8, lsr 60
	lsl	x8, x8, #4
	mov	x26, #15
	orr	x8, x8, x7, lsr 60
	bic	x7, x7, x26, lsl 60
	bic	x11, x11, x26, lsl 60
	# Add order times bits 504..507
	mov	x22, #0x2c13
	movk	x22, #0xa30a, lsl 16
	movk	x22, #0x9ce5, lsl 32
	movk	x22, #0xa7ed, lsl 48
	mov	x24, #0x6329
	movk	x24, #0x5d08, lsl 16
	movk	x24, #0x621, lsl 32
	movk	x24, #0xeb21, lsl 48
	mul	x21, x25, x22
	umulh	x22, x25, x22
	mul	x23, x25, x24
	umulh	x24, x25, x24
	adds	x8, x8, x21
	adcs	x9, x9, x22
	adcs	x10, x10, xzr
	adc	x11, x11, xzr
	adds	x9, x9, x23
	adcs	x10, x10, x24
	adc	x11, x11, xzr
	subs	x10, x10, x25
	sbc	x11, x11, xzr
	# Sub product of top 4 words and order
	mov	x26, #0x2c13
	movk	x26, #0xa30a, lsl 16
	movk	x26, #0x9ce5, lsl 32
	movk	x26, #0xa7ed, lsl 48
	mul	x16, x8, x26
	umulh	x17, x8, x26
	mul	x19, x9, x26
	umulh	x20, x9, x26
	mul	x21, x10, x26
	umulh	x22, x10, x26
	mul	x23, x11, x26
	umulh	x24, x11, x26
	adds	x4, x4, x16
	adcs	x5, x5, x17
	adcs	x6, x6, x21
	adcs	x7, x7, x22
	adc	x12, xzr, xzr
	adds	x5, x5, x19
	adcs	x6, x6, x20
	adcs	x7, x7, x23
	adc	x12, x12, x24
	mov	x26, #0x6329
	movk	x26, #0x5d08, lsl 16
	movk	x26, #0x621, lsl 32
	movk	x26, #0xeb21, lsl 48
	mul	x16, x8, x26
	umulh	x17, x8, x26
	mul	x19, x9, x26
	umulh	x20, x9, x26
	mul	x21, x10, x26
	umulh	x22, x10, x26
	mul	x23, x11, x26
	umulh	x24, x11, x26
	adds	x5, x5, x16
	adcs	x6, x6, x17
	adcs	x7, x7, x21
	adcs	x12, x12, x22
	adc	x13, xzr, xzr
	adds	x6, x6, x19
	adcs	x7, x7, x20
	adcs	x12, x12, x23
	adc	x13, x13, x24
	subs	x6, x6, x8
	sbcs	x7, x7, x9
	sbcs	x8, x12, x10
	sbc	x9, x13, x11
	asr	x25, x9, #57
	#   Conditionally subtract order starting at bit 125
	mov	x16, xzr
	mov	x20, xzr
	mov	x17, #0xba7d
	movk	x17, #0x4b9e, lsl 16
	movk	x17, #0x4c63, lsl 32
	movk	x17, #0xcb02, lsl 48
	mov	x19, #0xf39a
	movk	x19, #0xd45e, lsl 16
	movk	x19, #0xdf3b, lsl 32
	movk	x19, #0x29b, lsl 48
	movk	x16, #0xa000, lsl 48
	movk	x20, #0x200, lsl 48
	and	x16, x16, x25
	and	x17, x17, x25
	and	x19, x19, x25
	and	x20, x20, x25
	adds	x5, x5, x16
	adcs	x6, x6, x17
	adcs	x7, x7, x19
	adcs	x8, x8, xzr
	adc	x9, x9, x20
	#   Move bits 252-376 to own registers
	lsl	x9, x9, #4
	orr	x9, x9, x8, lsr 60
	lsl	x8, x8, #4
	mov	x25, #15
	orr	x8, x8, x7, lsr 60
	bic	x7, x7, x25, lsl 60
	# Sub product of top 2 words and order
	#   * -5812631a5cf5d3ed
	mov	x26, #0x2c13
	movk	x26, #0xa30a, lsl 16
	movk	x26, #0x9ce5, lsl 32
	movk	x26, #0xa7ed, lsl 48
	mul	x16, x8, x26
	umulh	x17, x8, x26
	mul	x19, x9, x26
	umulh	x20, x9, x26
	adds	x4, x4, x16
	adcs	x5, x5, x17
	adc	x12, xzr, xzr
	adds	x5, x5, x19
	adc	x12, x12, x20
	#   * -14def9dea2f79cd7
	mov	x26, #0x6329
	movk	x26, #0x5d08, lsl 16
	movk	x26, #0x621, lsl 32
	movk	x26, #0xeb21, lsl 48
	mul	x16, x8, x26
	umulh	x17, x8, x26
	mul	x19, x9, x26
	umulh	x20, x9, x26
	adds	x5, x5, x16
	adcs	x6, x6, x17
	adc	x13, xzr, xzr
	adds	x6, x6, x19
	adc	x13, x13, x20
	#   Add overflows at 2 * 64
	mov	x26, #15
	bic	x7, x7, x26, lsl 60
	adds	x6, x6, x12
	adc	x7, x7, x13
	#   Subtract top at 2 * 64
	subs	x6, x6, x8
	sbcs	x7, x7, x9
	sbc	x26, x26, x26
	#   Conditional sub order
	mov	x16, #0xd3ed
	movk	x16, #0x5cf5, lsl 16
	movk	x16, #0x631a, lsl 32
	movk	x16, #0x5812, lsl 48
	mov	x17, #0x9cd6
	movk	x17, #0xa2f7, lsl 16
	movk	x17, #0xf9de, lsl 32
	movk	x17, #0x14de, lsl 48
	and	x16, x16, x26
	and	x17, x17, x26
	adds	x4, x4, x16
	adcs	x5, x5, x17
	and	x26, x26, #0x1000000000000000
	adcs	x6, x6, xzr
	mov	x25, #15
	adc	x7, x7, x26
	bic	x7, x7, x25, lsl 60
	# Store result
	stp	x4, x5, [x0]
	stp	x6, x7, [x0, #16]
	ldp	x17, x19, [x29, #24]
	ldp	x20, x21, [x29, #40]
	ldp	x22, x23, [x29, #56]
	ldp	x24, x25, [x29, #72]
	ldr	x26, [x29, #88]
	ldp	x29, x30, [sp], #0x60
	ret
#ifndef __APPLE__
	.size	sc_muladd,.-sc_muladd
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#endif /* !CURVE25519_SMALL || !ED25519_SMALL */
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */

#if defined(__linux__) && defined(__ELF__)
.section	.note.GNU-stack,"",%progbits
#endif
#endif /* !WOLFSSL_ARMASM_INLINE */
